aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar cel 🌸 <cel@bunny.garden>2024-10-31 20:45:46 +0000
committerLibravatar cel 🌸 <cel@bunny.garden>2024-10-31 20:45:46 +0000
commitc41da2c89e2c188d9238b2c3e1555ed81335c81d (patch)
tree353d39b9f9191847c63f389485d1af36f30cdf3c
parent6b471061157ee1873d7ac4f3e30cd501d27dcb5a (diff)
downloadpeanuts-c41da2c89e2c188d9238b2c3e1555ed81335c81d.tar.gz
peanuts-c41da2c89e2c188d9238b2c3e1555ed81335c81d.tar.bz2
peanuts-c41da2c89e2c188d9238b2c3e1555ed81335c81d.zip
refactor parsers into Parser trait implementations
-rw-r--r--src/xml/mod.rs1240
-rw-r--r--src/xml/parsers.rs1502
2 files changed, 1653 insertions, 1089 deletions
diff --git a/src/xml/mod.rs b/src/xml/mod.rs
index 47c1779..b1d6ea5 100644
--- a/src/xml/mod.rs
+++ b/src/xml/mod.rs
@@ -1,185 +1,92 @@
use std::char;
-use nom::{
- branch::alt,
- bytes::streaming::{is_a, tag, take, take_while},
- character::{
- complete::one_of,
- streaming::{char, none_of, satisfy},
- },
- combinator::{map, not, opt, peek, recognize, value},
- error::{Error, ErrorKind},
- multi::{many0, many1, many_till},
- sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
- Err, IResult, Parser,
-};
-
-// parser: parses tokens from lexer into events
-// no well formedness, validity, or data model, simple translation of input into rust types
-// output is a rust representation of the input xml
-// types could be used for xml production too?
-
mod parsers;
+/// [1] NSAttName ::= PrefixedAttName | DefaultAttName
#[derive(Clone, Debug)]
pub enum NSAttName<'s> {
PrefixedAttName(PrefixedAttName<'s>),
DefaultAttName,
}
-/// [1] NSAttName ::= PrefixedAttName | DefaultAttName
-pub fn ns_att_name(input: &str) -> IResult<&str, NSAttName> {
- alt((
- map(prefixed_att_name, |prefixed_att_name| {
- NSAttName::PrefixedAttName(prefixed_att_name)
- }),
- value(NSAttName::DefaultAttName, default_att_name),
- ))(input)
-}
+/// [2] PrefixedAttName ::= 'xmlns:' NCName
#[derive(Clone, Debug)]
pub struct PrefixedAttName<'s>(NCName<'s>);
-/// [2] PrefixedAttName ::= 'xmlns:' NCName
-pub fn prefixed_att_name(input: &str) -> IResult<&str, PrefixedAttName> {
- map(preceded(tag("xmlns:"), nc_name), |nc_name| {
- PrefixedAttName(nc_name)
- })(input)
-}
+/// [3] DefaultAttName ::= 'xmlns';
#[derive(Clone, Debug)]
pub struct DefaultAttName;
-/// [3] DefaultAttName ::= 'xmlns';
-pub fn default_att_name(input: &str) -> IResult<&str, DefaultAttName> {
- value(DefaultAttName, tag("xmlns"))(input)
-}
+/// [4] NCName ::= Name - (Char* ':' Char*)
#[derive(Clone, Debug)]
pub struct NCName<'s>(&'s str);
-/// [4] NCName ::= Name - (Char* ':' Char*)
-pub fn nc_name(input: &str) -> IResult<&str, NCName> {
- map(
- recognize(pair(
- recognize(name_start_char).and_then(satisfy(|c| c != ':')),
- many_till(name_char, peek(char(':'))),
- )),
- |nc_name| NCName(nc_name),
- )(input)
-}
+/// [7] QName ::= PrefixedName | UnprefixedName
#[derive(Clone, Debug)]
pub enum QName<'s> {
PrefixedName(PrefixedName<'s>),
UnprefixedName(UnprefixedName<'s>),
}
-/// [7] QName ::= PrefixedName | UnprefixedName
-pub fn q_name(input: &str) -> IResult<&str, QName> {
- alt((
- map(prefixed_name, |prefixed_name| {
- QName::PrefixedName(prefixed_name)
- }),
- map(unprefixed_name, |unprefixed_name| {
- QName::UnprefixedName(unprefixed_name)
- }),
- ))(input)
-}
+/// [8] PrefixedName ::= Prefix ':' LocalPart
#[derive(Clone, Debug)]
pub struct PrefixedName<'s> {
prefix: Prefix<'s>,
local_part: LocalPart<'s>,
}
-/// [8] PrefixedName ::= Prefix ':' LocalPart
-pub fn prefixed_name(input: &str) -> IResult<&str, PrefixedName> {
- map(
- separated_pair(prefix, char(':'), local_part),
- |(prefix, local_part)| PrefixedName { prefix, local_part },
- )(input)
-}
+/// [9] UnprefixedName ::= LocalPart
#[derive(Clone, Debug)]
pub struct UnprefixedName<'s>(LocalPart<'s>);
-/// [9] UnprefixedName ::= LocalPart
-pub fn unprefixed_name(input: &str) -> IResult<&str, UnprefixedName> {
- map(local_part, |local_part| UnprefixedName(local_part))(input)
-}
+/// [10] Prefix ::= NCName
#[derive(Clone, Debug)]
pub struct Prefix<'s>(NCName<'s>);
-/// [10] Prefix ::= NCName
-pub fn prefix(input: &str) -> IResult<&str, Prefix> {
- map(nc_name, |nc_name| Prefix(nc_name))(input)
-}
+/// [11] LocalPart ::= NCName
#[derive(Clone, Debug)]
pub struct LocalPart<'s>(NCName<'s>);
-/// [11] LocalPart ::= NCName
-pub fn local_part(input: &str) -> IResult<&str, LocalPart> {
- map(nc_name, |nc_name| LocalPart(nc_name))(input)
-}
// xml spec
-pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
/// [1] document ::= prolog element Misc*
-pub fn document(input: &str) -> IResult<&str, Document> {
- tuple((prolog, element, many0(misc)))(input)
-}
+pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
-pub type Char = char;
/// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
-pub fn xmlchar(input: &str) -> IResult<&str, Char> {
- satisfy(
- |c| matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}'),
- )(input)
-}
+#[repr(transparent)]
+pub struct Char(char);
-pub type S<'s> = &'s str;
/// [3] S ::= (#x20 | #x9 | #xD | #xA)+
-pub fn s(input: &str) -> IResult<&str, S> {
- is_a("\u{20}\u{9}\u{D}\u{A}")(input)
-}
+#[repr(transparent)]
+pub struct S<'s>(&'s str);
-pub type NameStartChar = char;
/// [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
-pub fn name_start_char(input: &str) -> IResult<&str, NameStartChar> {
- satisfy(
- |c| matches!(c, ':' | 'A'..='Z' | '_' | 'a'..='z' | '\u{C0}'..='\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' | '\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}'),
- )(input)
-}
+#[repr(transparent)]
+pub struct NameStartChar(char);
-pub type NameChar = char;
/// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
-pub fn name_char(input: &str) -> IResult<&str, NameChar> {
- alt((
- name_start_char,
- satisfy(
- |c| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'),
- ),
- ))(input)
-}
+#[repr(transparent)]
+pub struct NameChar(char);
-pub type Name<'s> = &'s str;
/// [5] Name ::= NameStartChar (NameChar)*
-pub fn name(input: &str) -> IResult<&str, Name> {
- recognize(pair(name_start_char, many0(name_char)))(input)
-}
+#[derive(Debug, Clone, PartialEq, Eq)]
+#[repr(transparent)]
+pub struct Name<'s>(&'s str);
-pub type Names<'s> = &'s str;
/// [6] Names ::= Name (#x20 Name)*
-pub fn names(input: &str) -> IResult<&str, Names> {
- recognize(pair(name, many0(pair(char('\u{20}'), name))))(input)
-}
+#[repr(transparent)]
+// TODO: turn into vec
+pub struct Names<'s>(&'s str);
-pub type Nmtoken<'s> = &'s str;
/// [7] Nmtoken ::= (NameChar)+
-pub fn nmtoken(input: &str) -> IResult<&str, Nmtoken> {
- recognize(many1(name_char))(input)
-}
+#[derive(Debug, Clone)]
+#[repr(transparent)]
+pub struct Nmtoken<'s>(&'s str);
-pub type Nmtokens<'s> = &'s str;
/// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
-pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> {
- recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)
-}
+#[repr(transparent)]
+// TODO: turn into vec
+pub struct Nmtokens<'s>(&'s str);
#[derive(Clone, Debug)]
pub enum LiteralData<'s> {
@@ -187,275 +94,103 @@ pub enum LiteralData<'s> {
PEReference(PEReference<'s>),
Reference(Reference<'s>),
}
-
-pub type EntityValue<'s> = Vec<LiteralData<'s>>;
/// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
/// | "'" ([^%&'] | PEReference | Reference)* "'"
-pub fn entity_value(input: &str) -> IResult<&str, EntityValue> {
- alt((
- delimited(
- char('"'),
- many0(alt((
- map(
- recognize(many_till(take(1usize), peek(one_of("%&\"")))),
- |string| LiteralData::String(string),
- ),
- map(pe_reference, |pe_reference| {
- LiteralData::PEReference(pe_reference)
- }),
- map(reference, |reference| LiteralData::Reference(reference)),
- ))),
- char('"'),
- ),
- delimited(
- char('\''),
- many0(alt((
- map(
- recognize(many_till(take(1usize), peek(one_of("%&'")))),
- |string| LiteralData::String(string),
- ),
- map(pe_reference, |pe_reference| {
- LiteralData::PEReference(pe_reference)
- }),
- map(reference, |reference| LiteralData::Reference(reference)),
- ))),
- char('\''),
- ),
- ))(input)
-}
+#[derive(Debug)]
+#[repr(transparent)]
+pub struct EntityValue<'s>(Vec<LiteralData<'s>>);
-pub type AttValue<'s> = Vec<LiteralData<'s>>;
/// [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
/// | "'" ([^<&'] | Reference)* "'"
-pub fn att_value(input: &str) -> IResult<&str, AttValue> {
- alt((
- delimited(
- char('"'),
- many0(alt((
- map(
- recognize(many_till(take(1usize), peek(one_of("%&\"")))),
- |string| LiteralData::String(string),
- ),
- map(reference, |reference| LiteralData::Reference(reference)),
- ))),
- char('"'),
- ),
- delimited(
- char('\''),
- many0(alt((
- map(
- recognize(many_till(take(1usize), peek(one_of("%&'")))),
- |string| LiteralData::String(string),
- ),
- map(reference, |reference| LiteralData::Reference(reference)),
- ))),
- char('\''),
- ),
- ))(input)
-}
+#[derive(Clone, Debug)]
+#[repr(transparent)]
+pub struct AttValue<'s>(Vec<LiteralData<'s>>);
-pub type SystemLiteral<'s> = &'s str;
/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
-pub fn system_literal(input: &str) -> IResult<&str, SystemLiteral> {
- alt((
- delimited(char('"'), recognize(many0(none_of("\""))), char('"')),
- delimited(char('\''), recognize(many0(none_of("'"))), char('\'')),
- ))(input)
-}
+#[derive(Debug)]
+#[repr(transparent)]
+pub struct SystemLiteral<'s>(&'s str);
-pub type PubidLiteral<'s> = &'s str;
/// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
-pub fn pubid_literal(input: &str) -> IResult<&str, PubidLiteral> {
- alt((
- delimited(char('"'), recognize(many0(pubid_char)), char('"')),
- delimited(
- char('\''),
- recognize(many0(recognize(not(char('\''))).and_then(pubid_char))),
- char('\''),
- ),
- ))(input)
-}
+#[derive(Debug)]
+#[repr(transparent)]
+pub struct PubidLiteral<'s>(&'s str);
-pub type PubidChar<'s> = char;
/// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
-pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> {
- satisfy(|c| matches!(c, '\u{20}' | '\u{D}' | '\u{A}' | 'a'..='z' | 'A'..='Z' | '0'..='9'))(
- input,
- )
-}
+#[repr(transparent)]
+pub struct PubidChar(char);
-pub type CharData<'s> = &'s str;
/// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
-pub fn char_data(input: &str) -> IResult<&str, CharData> {
- recognize(many_till(
- none_of("<&"),
- peek(alt((recognize(one_of("<&")), tag("]]>")))),
- ))(input)
-
- // let tagg: &str;
- // if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) {
- // if let Ok((_, tagg2)) =
- // peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input)
- // {
- // if tagg1.len() < tagg2.len() {
- // tagg = tagg1
- // } else {
- // tagg = tagg2
- // }
- // } else {
- // tagg = tagg1;
- // }
- // } else {
- // (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)?
- // }
- // tag(tagg)(input)
-
- // recognize(many0(permutation((none_of("<&"), not(tag("]]>"))))))(input)
- // recognize(many0(not(alt((tag("<"), tag("&"), tag("]]>"))))))(input)
- // take_till(|c| c == '<' || c == '&').and_then(take_until("]]>"))(input)
-}
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[repr(transparent)]
+pub struct CharData<'s>(&'s str);
-pub type Comment<'s> = &'s str;
-/// Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
-pub fn comment(input: &str) -> IResult<&str, Comment> {
- delimited(
- tag("<!--"),
- recognize(many_till(xmlchar, peek(tag("--")))),
- tag("-->"),
- )(input)
-}
+/// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[repr(transparent)]
+pub struct Comment<'s>(&'s str);
+/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
#[derive(Clone, Debug)]
pub struct PI<'s> {
- target: &'s str,
+ target: PITarget<'s>,
instruction: Option<&'s str>,
}
-/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
-pub fn pi(input: &str) -> IResult<&str, PI> {
- map(
- delimited(
- tag("<?"),
- pair(
- pi_target,
- opt(recognize(pair(s, many_till(xmlchar, peek(tag("?>")))))),
- ),
- tag("?>"),
- ),
- |(target, instruction)| PI {
- target,
- instruction,
- },
- )(input)
-}
-pub type PITarget<'s> = &'s str;
/// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
-pub fn pi_target(input: &str) -> IResult<&str, PITarget> {
- let (rest, name) = name(input)?;
- if name.to_lowercase() == "xml" {
- return Err(Err::Error(Error {
- input,
- // TODO: check if better error to return
- code: ErrorKind::Tag,
- }));
- } else {
- return Ok((rest, name));
- }
-}
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[repr(transparent)]
+pub struct PITarget<'s>(Name<'s>);
-pub type CDSect<'s> = (CDStart<'s>, CData<'s>, CDEnd<'s>);
/// [18] CDSect ::= CDStart CData CDEnd
-pub fn cd_sect(input: &str) -> IResult<&str, CDSect> {
- tuple((cd_start, cdata, cd_end))(input)
-}
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[repr(transparent)]
+pub struct CDSect<'s>(CData<'s>);
-pub type CDStart<'s> = &'s str;
/// [19] CDStart ::= '<![CDATA['
-pub fn cd_start(input: &str) -> IResult<&str, CDStart> {
- tag("<![CDATA[")(input)
-}
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct CDStart;
-pub type CData<'s> = &'s str;
/// [20] CData ::= (Char* - (Char* ']]>' Char*))
-pub fn cdata(input: &str) -> IResult<&str, CData> {
- recognize(many_till(xmlchar, peek(tag("]]>"))))(input)
-}
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[repr(transparent)]
+pub struct CData<'s>(&'s str);
-pub type CDEnd<'s> = &'s str;
/// [21] CDEnd ::= ']]>'
-pub fn cd_end(input: &str) -> IResult<&str, CDEnd> {
- tag("]]>")(input)
-}
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct CDEnd;
+/// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
pub type Prolog<'s> = (
Option<XMLDecl<'s>>,
Vec<Misc<'s>>,
Option<(DoctypeDecl<'s>, Vec<Misc<'s>>)>,
);
-/// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
-pub fn prolog(input: &str) -> IResult<&str, Prolog> {
- tuple((
- opt(xml_decl),
- many0(misc),
- opt(tuple((doctypedecl, many0(misc)))),
- ))(input)
-}
+/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
#[derive(Debug)]
pub struct XMLDecl<'s> {
version_info: VersionInfo,
encoding_decl: Option<EncodingDecl<'s>>,
sd_decl: Option<SDDecl>,
}
-/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
-pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> {
- map(
- delimited(
- tag("<?xml"),
- tuple((version_info, opt(encoding_decl), opt(sd_decl))),
- pair(opt(s), tag("?>")),
- ),
- |(version_info, encoding_decl, sd_decl)| XMLDecl {
- version_info,
- encoding_decl,
- sd_decl,
- },
- )(input)
-}
-pub type VersionInfo = VersionNum;
/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
-pub fn version_info(input: &str) -> IResult<&str, VersionInfo> {
- preceded(
- tuple((s, tag("version"), eq)),
- alt((
- delimited(char('\''), version_num, char('\'')),
- delimited(char('"'), version_num, char('"')),
- )),
- )(input)
-}
+#[derive(Debug)]
+pub struct VersionInfo(VersionNum);
/// [25] Eq ::= S? '=' S?
-pub fn eq(input: &str) -> IResult<&str, &str> {
- recognize(tuple((opt(s), char('='), opt(s))))(input)
-}
+#[derive(Clone)]
+pub struct Eq;
+/// [26] VersionNum ::= '1.' [0-9]+
#[derive(Clone, Debug)]
pub enum VersionNum {
One,
OneDotOne,
}
-/// [26] VersionNum ::= '1.' [0-9]+
-pub fn version_num(input: &str) -> IResult<&str, VersionNum> {
- preceded(
- tag("1."),
- alt((
- value(VersionNum::One, char('0')),
- value(VersionNum::OneDotOne, char('1')),
- )),
- )(input)
-}
+/// [27] Misc ::= Comment | PI | S
#[derive(Clone, Debug)]
pub enum Misc<'s> {
Comment(Comment<'s>),
@@ -463,80 +198,33 @@ pub enum Misc<'s> {
// TODO: how to deal with whitespace
S,
}
-/// [27] Misc ::= Comment | PI | S
-pub fn misc(input: &str) -> IResult<&str, Misc> {
- alt((
- map(comment, |comment| Misc::Comment(comment)),
- map(pi, |pi| Misc::PI(pi)),
- value(Misc::S, s),
- ))(input)
-}
+/// [16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
+/// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
#[derive(Debug)]
pub struct DoctypeDecl<'s> {
name: QName<'s>,
external_id: Option<ExternalID<'s>>,
int_subset: Option<IntSubset<'s>>,
}
-/// [16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
-/// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
-pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> {
- map(
- delimited(
- pair(tag("<!DOCTYPE"), s),
- tuple((
- q_name,
- opt(preceded(s, external_id)),
- preceded(
- opt(s),
- opt(terminated(
- delimited(tag("["), int_subset, tag("]")),
- opt(s),
- )),
- ),
- )),
- tag(">"),
- ),
- |(name, external_id, int_subset)| DoctypeDecl {
- name,
- external_id,
- int_subset,
- },
- )(input)
-}
+/// [28a] DeclSep ::= PEReference | S
#[derive(Clone, Debug)]
pub enum DeclSep<'s> {
PEReference(PEReference<'s>),
// TODO: tackle whitespace
S,
}
-/// [28a] DeclSep ::= PEReference | S
-pub fn decl_sep(input: &str) -> IResult<&str, DeclSep> {
- alt((
- map(pe_reference, |pe_reference| {
- DeclSep::PEReference(pe_reference)
- }),
- value(DeclSep::S, s),
- ))(input)
-}
#[derive(Debug)]
pub enum IntSubsetDeclaration<'s> {
MarkupDecl(MarkupDecl<'s>),
DeclSep(DeclSep<'s>),
}
-type IntSubset<'s> = Vec<IntSubsetDeclaration<'s>>;
/// [28b] intSubset ::= (markupdecl | DeclSep)*
-pub fn int_subset(input: &str) -> IResult<&str, IntSubset> {
- many0(alt((
- map(markup_decl, |markup_decl| {
- IntSubsetDeclaration::MarkupDecl(markup_decl)
- }),
- map(decl_sep, |decl_sep| IntSubsetDeclaration::DeclSep(decl_sep)),
- )))(input)
-}
+pub type IntSubset<'s> = Vec<IntSubsetDeclaration<'s>>;
+/// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
#[derive(Debug)]
pub enum MarkupDecl<'s> {
Elementdecl(Elementdecl<'s>),
@@ -546,117 +234,44 @@ pub enum MarkupDecl<'s> {
PI(PI<'s>),
Comment(Comment<'s>),
}
-/// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
-pub fn markup_decl(input: &str) -> IResult<&str, MarkupDecl> {
- alt((
- map(elementdecl, |elementdecl| {
- MarkupDecl::Elementdecl(elementdecl)
- }),
- map(attlist_decl, |attlist_decl| {
- MarkupDecl::AttlistDecl(attlist_decl)
- }),
- map(entity_decl, |entity_decl| {
- MarkupDecl::EntityDecl(entity_decl)
- }),
- map(notation_decl, |notation_decl| {
- MarkupDecl::NotationDecl(notation_decl)
- }),
- map(pi, |pi| MarkupDecl::PI(pi)),
- map(comment, |comment| MarkupDecl::Comment(comment)),
- ))(input)
-}
+/// [30] extSubset ::= TextDecl? extSubsetDecl
pub struct ExtSubset<'s> {
text_decl: Option<TextDecl<'s>>,
ext_subset_decl: ExtSubsetDecl<'s>,
}
-/// [30] extSubset ::= TextDecl? extSubsetDecl
-pub fn ext_subset(input: &str) -> IResult<&str, ExtSubset> {
- map(
- pair(opt(text_decl), ext_subset_decl),
- |(text_decl, ext_subset_decl)| ExtSubset {
- text_decl,
- ext_subset_decl,
- },
- )(input)
-}
pub enum ExtSubsetDeclaration<'s> {
MarkupDecl(MarkupDecl<'s>),
ConditionalSect(ConditionalSect<'s>),
DeclSep(DeclSep<'s>),
}
-type ExtSubsetDecl<'s> = Vec<ExtSubsetDeclaration<'s>>;
/// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
-pub fn ext_subset_decl(input: &str) -> IResult<&str, ExtSubsetDecl> {
- many0(alt((
- map(markup_decl, |markup_decl| {
- ExtSubsetDeclaration::MarkupDecl(markup_decl)
- }),
- map(conditional_sect, |conditional_sect| {
- ExtSubsetDeclaration::ConditionalSect(conditional_sect)
- }),
- map(decl_sep, |decl_sep| ExtSubsetDeclaration::DeclSep(decl_sep)),
- )))(input)
-}
+type ExtSubsetDecl<'s> = Vec<ExtSubsetDeclaration<'s>>;
-pub type SDDecl = bool;
/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
-pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> {
- preceded(
- tuple((s, tag("standalone"), eq)),
- alt((
- delimited(
- char('\''),
- alt((value(true, tag("yes")), value(false, tag("no")))),
- char('\''),
- ),
- delimited(
- char('"'),
- alt((value(true, tag("yes")), value(false, tag("no")))),
- char('"'),
- ),
- )),
- )(input)
-}
+pub type SDDecl = bool;
// (Productions 33 through 38 have been removed.)
+/// [39] element ::= EmptyElemTag | STag content ETag
#[derive(Debug, Clone)]
pub enum Element<'s> {
Empty(EmptyElemTag<'s>),
NotEmpty(STag<'s>, Content<'s>, ETag<'s>),
}
-/// [39] element ::= EmptyElemTag | STag content ETag
-pub fn element(input: &str) -> IResult<&str, Element> {
- alt((
- map(empty_elem_tag, |empty_elem_tag| {
- Element::Empty(empty_elem_tag)
- }),
- map(tuple((s_tag, content, e_tag)), |(s_tag, content, e_tag)| {
- Element::NotEmpty(s_tag, content, e_tag)
- }),
- ))(input)
-}
+/// [12] STag ::= '<' QName (S Attribute)* S? '>'
+/// [40] STag ::= '<' Name (S Attribute)* S? '>'
#[derive(Debug, Clone)]
pub struct STag<'s> {
name: QName<'s>,
attributes: Vec<Attribute<'s>>,
}
-/// [12] STag ::= '<' QName (S Attribute)* S? '>'
-/// [40] STag ::= '<' Name (S Attribute)* S? '>'
-pub fn s_tag(input: &str) -> IResult<&str, STag> {
- map(
- delimited(
- tag("<"),
- pair(q_name, many0(preceded(s, attribute))),
- pair(opt(s), tag(">")),
- ),
- |(name, attributes)| STag { name, attributes },
- )(input)
-}
+/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue
+// pub type Attribute<'s> = (Name<'s>, AttValue<'s>);
+/// [41] Attribute ::= Name Eq AttValue
#[derive(Debug, Clone)]
pub enum Attribute<'s> {
NamespaceDeclaration {
@@ -668,36 +283,13 @@ pub enum Attribute<'s> {
value: AttValue<'s>,
},
}
-/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue
-pub fn attribute(input: &str) -> IResult<&str, Attribute> {
- alt((
- map(
- separated_pair(ns_att_name, eq, att_value),
- |(ns_name, value)| Attribute::NamespaceDeclaration { ns_name, value },
- ),
- map(separated_pair(q_name, eq, att_value), |(name, value)| {
- Attribute::Attribute { name, value }
- }),
- ))(input)
-}
-// pub type Attribute<'s> = (Name<'s>, AttValue<'s>);
-/// [41] Attribute ::= Name Eq AttValue
-// pub fn attribute(input: &str) -> IResult<&str, Attribute> {
-// separated_pair(name, eq, att_value)(input)
-// }
+/// [13] ETag ::= '</' QName S? '>'
+/// [42] ETag ::= '</' Name S? '>'
#[derive(Debug, Clone)]
pub struct ETag<'s> {
name: QName<'s>,
}
-/// [13] ETag ::= '</' QName S? '>'
-/// [42] ETag ::= '</' Name S? '>'
-pub fn e_tag(input: &str) -> IResult<&str, ETag> {
- map(
- delimited(tag("</"), q_name, pair(opt(s), tag(">"))),
- |name| ETag { name },
- )(input)
-}
#[derive(Debug, Clone)]
pub enum ContentItem<'s> {
@@ -708,68 +300,31 @@ pub enum ContentItem<'s> {
PI(PI<'s>),
Comment(Comment<'s>),
}
+/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
#[derive(Debug, Clone)]
pub struct Content<'s> {
char_data: Option<CharData<'s>>,
content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
}
-/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
-pub fn content(input: &str) -> IResult<&str, Content> {
- map(
- pair(
- opt(char_data),
- many0(pair(
- alt((
- map(element, |element| ContentItem::Element(element)),
- map(reference, |reference| ContentItem::Reference(reference)),
- map(cd_sect, |cd_sect| ContentItem::CDSect(cd_sect)),
- map(pi, |pi| ContentItem::PI(pi)),
- map(comment, |comment| ContentItem::Comment(comment)),
- )),
- opt(char_data),
- )),
- ),
- |(char_data, content)| Content { char_data, content },
- )(input)
-}
+/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
+/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
#[derive(Debug, Clone)]
pub struct EmptyElemTag<'s> {
name: QName<'s>,
attributes: Vec<Attribute<'s>>,
}
-/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
-/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
-pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> {
- map(
- delimited(
- tag("<"),
- pair(q_name, many0(preceded(s, attribute))),
- pair(opt(s), tag("/>")),
- ),
- |(name, attributes)| EmptyElemTag { name, attributes },
- )(input)
-}
+/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'
+/// [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
#[derive(Debug)]
pub struct Elementdecl<'s> {
name: QName<'s>,
contentspec: Contentspec<'s>,
}
-/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'
-/// [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
-pub fn elementdecl(input: &str) -> IResult<&str, Elementdecl> {
- map(
- delimited(
- pair(tag("<!ELEMENT"), s),
- separated_pair(q_name, s, contentspec),
- pair(opt(s), tag(">")),
- ),
- |(name, contentspec)| Elementdecl { name, contentspec },
- )(input)
-}
// TODO: casings???
+/// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
#[derive(Clone, Debug)]
pub enum Contentspec<'s> {
Empty,
@@ -777,16 +332,8 @@ pub enum Contentspec<'s> {
Mixed(Mixed<'s>),
Children(Children<'s>),
}
-/// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
-pub fn contentspec(input: &str) -> IResult<&str, Contentspec> {
- alt((
- value(Contentspec::Empty, tag("EMPTY")),
- value(Contentspec::Any, tag("ANY")),
- map(mixed, |mixed| Contentspec::Mixed(mixed)),
- map(children, |children| Contentspec::Children(children)),
- ))(input)
-}
+/// Occurence ::= ('?' | '*' | '+')?
#[derive(Clone, Debug)]
pub enum Occurence {
Once,
@@ -794,46 +341,18 @@ pub enum Occurence {
Many0,
Many1,
}
-/// Occurence ::= ('?' | '*' | '+')?
-pub fn occurence(input: &str) -> IResult<&str, Occurence> {
- map(
- opt(alt((tag("?"), tag("*"), tag("+")))),
- |occurence| match occurence {
- Some("?") => Occurence::Optional,
- Some("*") => Occurence::Many0,
- Some("+") => Occurence::Many1,
- _ => Occurence::Once,
- },
- )(input)
-}
#[derive(Clone, Debug)]
pub enum ChildrenKind<'s> {
Choice(Choice<'s>),
Seq(Seq<'s>),
}
+/// [47] children ::= (choice | seq) ('?' | '*' | '+')?
#[derive(Clone, Debug)]
pub struct Children<'s> {
kind: ChildrenKind<'s>,
occurence: Occurence,
}
-/// [47] children ::= (choice | seq) ('?' | '*' | '+')?
-pub fn children(input: &str) -> IResult<&str, Children> {
- map(
- pair(
- alt((
- map(choice, |choice| ChildrenKind::Choice(choice)),
- map(seq, |seq| ChildrenKind::Seq(seq)),
- )),
- occurence,
- ),
- |(kind, occurence)| Children { kind, occurence },
- )(input)
- // alt((
- // map(pair(choice, occurence), |(choice, occurence)| Children::Choice(choice, occurence)),
- // map(pair(seq, occurence), |(seq, occurence)| Children::Seq(seq, occurence))
- // ))(input)
-}
#[derive(Clone, Debug)]
pub enum CpKind<'s> {
@@ -841,165 +360,63 @@ pub enum CpKind<'s> {
Choice(Choice<'s>),
Seq(Seq<'s>),
}
+/// [18] cp ::= (QName | choice | seq) ('?' | '*' | '+')?
+/// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
#[derive(Clone, Debug)]
pub struct Cp<'s> {
kind: CpKind<'s>,
occurence: Occurence,
}
-/// [18] cp ::= (QName | choice | seq) ('?' | '*' | '+')?
-/// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
-pub fn cp(input: &str) -> IResult<&str, Cp> {
- map(
- pair(
- alt((
- map(q_name, |name| CpKind::Name(name)),
- map(choice, |choice| CpKind::Choice(choice)),
- map(seq, |seq| CpKind::Seq(seq)),
- )),
- occurence,
- ),
- |(kind, occurence)| Cp { kind, occurence },
- )(input)
-}
+/// [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
#[derive(Clone, Debug)]
pub struct Choice<'s>(Vec<Cp<'s>>);
-/// [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
-pub fn choice(input: &str) -> IResult<&str, Choice> {
- map(
- delimited(
- pair(tag("("), opt(s)),
- pair(cp, many1(preceded(tuple((opt(s), tag("|"), opt(s))), cp))),
- pair(opt(s), tag(")")),
- ),
- |(head, tail)| {
- let choice = vec![vec![head], tail].concat();
- Choice(choice)
- },
- )(input)
-}
+/// [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
#[derive(Clone, Debug)]
pub struct Seq<'s>(Vec<Cp<'s>>);
-/// [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
-pub fn seq(input: &str) -> IResult<&str, Seq> {
- map(
- delimited(
- pair(tag("("), opt(s)),
- pair(cp, many0(preceded(tuple((opt(s), tag(","), opt(s))), cp))),
- pair(opt(s), tag(")")),
- ),
- |(head, tail)| {
- let seq = vec![vec![head], tail].concat();
- Seq(seq)
- },
- )(input)
-}
// always contains #PCDATA
-#[derive(Clone, Debug)]
-pub struct Mixed<'s>(Vec<QName<'s>>);
/// [19] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'
/// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
-pub fn mixed(input: &str) -> IResult<&str, Mixed> {
- alt((
- map(
- delimited(
- tuple((tag("("), s, tag("#PCDATA"))),
- many0(preceded(tuple((opt(s), tag("|"), opt(s))), q_name)),
- pair(opt(s), tag(")*")),
- ),
- |names| Mixed(names),
- ),
- value(
- Mixed(Vec::new()),
- tuple((tag("("), opt(s), tag("#PCDATA"), opt(s), tag(")"))),
- ),
- ))(input)
-}
+#[derive(Clone, Debug)]
+pub struct Mixed<'s>(Vec<QName<'s>>);
+/// [20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'
+/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
#[derive(Debug)]
pub struct AttlistDecl<'s> {
element_type: QName<'s>,
att_defs: Vec<AttDef<'s>>,
}
-/// [20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'
-/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
-pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> {
- map(
- delimited(
- pair(tag("<!ATTLIST"), s),
- pair(q_name, many0(att_def)),
- pair(opt(s), tag(">")),
- ),
- |(element_type, att_defs)| AttlistDecl {
- element_type,
- att_defs,
- },
- )(input)
-}
#[derive(Debug)]
pub enum AttDefName<'s> {
QName(QName<'s>),
NSAttName(NSAttName<'s>),
}
+/// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
+/// [53] AttDef ::= S Name S AttType S DefaultDecl
#[derive(Debug)]
pub struct AttDef<'s> {
name: AttDefName<'s>,
att_type: AttType<'s>,
default_decl: DefaultDecl<'s>,
}
-/// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
-/// [53] AttDef ::= S Name S AttType S DefaultDecl
-pub fn att_def(input: &str) -> IResult<&str, AttDef> {
- map(
- tuple((
- preceded(
- s,
- alt((
- map(q_name, |q_name| AttDefName::QName(q_name)),
- map(ns_att_name, |ns_att_name| {
- AttDefName::NSAttName(ns_att_name)
- }),
- )),
- ),
- preceded(s, att_type),
- preceded(s, default_decl),
- )),
- |(name, att_type, default_decl)| AttDef {
- name,
- att_type,
- default_decl,
- },
- )(input)
-}
+/// [54] AttType ::= StringType | TokenizedType | EnumeratedType
#[derive(Clone, Debug)]
pub enum AttType<'s> {
StringType,
TokenizedType(TokenizedType),
EnumeratedType(EnumeratedType<'s>),
}
-/// [54] AttType ::= StringType | TokenizedType | EnumeratedType
-pub fn att_type(input: &str) -> IResult<&str, AttType> {
- alt((
- value(AttType::StringType, string_type),
- map(tokenized_type, |tokenized_type| {
- AttType::TokenizedType(tokenized_type)
- }),
- map(enumerated_type, |enumerated_type| {
- AttType::EnumeratedType(enumerated_type)
- }),
- ))(input)
-}
-pub type StringType<'s> = &'s str;
/// [55] StringType ::= 'CDATA'
-pub fn string_type(input: &str) -> IResult<&str, StringType> {
- tag("CDATA")(input)
-}
+#[derive(Clone)]
+pub struct StringType;
+/// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
#[derive(Clone, Debug)]
pub enum TokenizedType {
ID,
@@ -1010,265 +427,97 @@ pub enum TokenizedType {
NMToken,
NMTokens,
}
-/// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
-pub fn tokenized_type(input: &str) -> IResult<&str, TokenizedType> {
- alt((
- value(TokenizedType::ID, tag("ID")),
- // TODO: check if this is required
- // try idrefs first to avoid losing 'S'
- value(TokenizedType::IDRefs, tag("IDREFS")),
- value(TokenizedType::IDRef, tag("IDREF")),
- value(TokenizedType::Entity, tag("ENTITY")),
- value(TokenizedType::Entities, tag("ENTITIES")),
- // same here
- value(TokenizedType::NMTokens, tag("NMTOKENS")),
- value(TokenizedType::NMToken, tag("NMTOKEN")),
- ))(input)
-}
+/// [57] EnumeratedType ::= NotationType | Enumeration
#[derive(Debug, Clone)]
pub enum EnumeratedType<'s> {
NotationType(NotationType<'s>),
Enumeration(Enumeration<'s>),
}
-/// [57] EnumeratedType ::= NotationType | Enumeration
-pub fn enumerated_type(input: &str) -> IResult<&str, EnumeratedType> {
- alt((
- map(notation_type, |notation_type| {
- EnumeratedType::NotationType(notation_type)
- }),
- map(enumeration, |enumeration| {
- EnumeratedType::Enumeration(enumeration)
- }),
- ))(input)
-}
+/// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
#[derive(Debug, Clone)]
pub struct NotationType<'s>(Vec<Name<'s>>);
-/// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
-pub fn notation_type(input: &str) -> IResult<&str, NotationType> {
- map(
- delimited(
- tuple((tag("NOTATION"), s, tag("("), opt(s))),
- pair(
- name,
- many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)),
- ),
- pair(opt(s), tag(")")),
- ),
- |(head, tail)| {
- let notation_type = vec![vec![head], tail].concat();
- NotationType(notation_type)
- },
- )(input)
-}
+/// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
#[derive(Debug, Clone)]
pub struct Enumeration<'s>(Vec<Nmtoken<'s>>);
-/// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
-pub fn enumeration(input: &str) -> IResult<&str, Enumeration> {
- map(
- delimited(
- pair(tag("("), opt(s)),
- pair(
- nmtoken,
- many0(preceded(tuple((opt(s), tag("|"), opt(s))), nmtoken)),
- ),
- pair(opt(s), tag(")")),
- ),
- |(head, tail)| {
- let enumeration = vec![vec![head], tail].concat();
- Enumeration(enumeration)
- },
- )(input)
-}
+/// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
#[derive(Debug, Clone)]
pub enum DefaultDecl<'s> {
Required,
Implied,
Fixed(AttValue<'s>),
}
-/// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
-pub fn default_decl(input: &str) -> IResult<&str, DefaultDecl> {
- alt((
- value(DefaultDecl::Required, tag("#REQUIRED")),
- value(DefaultDecl::Implied, tag("#IMPLIED")),
- map(
- preceded(opt(pair(tag("#FIXED"), s)), att_value),
- |att_value| DefaultDecl::Fixed(att_value),
- ),
- ))(input)
-}
+/// [61] conditionalSect ::= includeSect | ignoreSect
pub enum ConditionalSect<'s> {
IncludeSect(IncludeSect<'s>),
IgnoreSect(IgnoreSect<'s>),
}
-/// [61] conditionalSect ::= includeSect | ignoreSect
-pub fn conditional_sect(input: &str) -> IResult<&str, ConditionalSect> {
- alt((
- map(include_sect, |include_sect| {
- ConditionalSect::IncludeSect(include_sect)
- }),
- map(ignore_sect, |ignore_sect| {
- ConditionalSect::IgnoreSect(ignore_sect)
- }),
- ))(input)
-}
-pub struct IncludeSect<'s>(ExtSubsetDecl<'s>);
/// [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
-pub fn include_sect(input: &str) -> IResult<&str, IncludeSect> {
- map(
- delimited(
- tuple((tag("<!["), opt(s), tag("INCLUDE"), opt(s), tag("["))),
- ext_subset_decl,
- tag("]]>"),
- ),
- |ext_subset_decl| IncludeSect(ext_subset_decl),
- )(input)
-}
+pub struct IncludeSect<'s>(ExtSubsetDecl<'s>);
-pub struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>);
/// [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
-pub fn ignore_sect(input: &str) -> IResult<&str, IgnoreSect> {
- map(
- delimited(
- tuple((tag("<!["), opt(s), tag("IGNORE"), opt(s), tag("["))),
- many0(ignore_sect_contents),
- tag("]]>"),
- ),
- |ignore_sect_contents| IgnoreSect(ignore_sect_contents),
- )(input)
-}
+pub struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>);
+/// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
pub struct IgnoreSectContents<'s> {
// TODO: what the fuck does this mean
ignore: Ignore<'s>,
ignore_list: Vec<(IgnoreSectContents<'s>, Ignore<'s>)>,
}
-/// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
-pub fn ignore_sect_contents(input: &str) -> IResult<&str, IgnoreSectContents> {
- map(
- pair(
- ignore,
- many0(tuple((
- delimited(tag("<!["), ignore_sect_contents, tag("]]>")),
- ignore,
- ))),
- ),
- |(ignore, ignore_list)| IgnoreSectContents {
- ignore,
- ignore_list,
- },
- )(input)
-}
-pub type Ignore<'s> = &'s str;
/// [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
-pub fn ignore(input: &str) -> IResult<&str, Ignore> {
- recognize(many_till(xmlchar, peek(alt((tag("<!["), tag("]]>"))))))(input)
-}
+pub struct Ignore<'s>(&'s str);
+/// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
#[derive(Clone, Debug)]
pub enum CharRef<'s> {
Decimal(&'s str),
Hexadecimal(&'s str),
}
-/// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
-pub fn char_ref(input: &str) -> IResult<&str, CharRef> {
- alt((
- delimited(
- tag("&#"),
- map(take_while(|c| matches!(c, '0'..='9')), |decimal| {
- CharRef::Decimal(decimal)
- }),
- tag(";"),
- ),
- delimited(
- tag("&#x"),
- map(
- take_while(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F' )),
- |hexadecimal| CharRef::Hexadecimal(hexadecimal),
- ),
- tag(";"),
- ),
- ))(input)
-}
+/// [67] Reference ::= EntityRef | CharRef
#[derive(Clone, Debug)]
pub enum Reference<'s> {
EntityRef(EntityRef<'s>),
CharRef(CharRef<'s>),
}
-/// [67] Reference ::= EntityRef | CharRef
-pub fn reference(input: &str) -> IResult<&str, Reference> {
- alt((
- map(entity_ref, |entity_ref| Reference::EntityRef(entity_ref)),
- map(char_ref, |char_ref| Reference::CharRef(char_ref)),
- ))(input)
-}
-pub type EntityRef<'s> = &'s str;
/// [68] EntityRef ::= '&' Name ';'
-pub fn entity_ref(input: &str) -> IResult<&str, EntityRef> {
- delimited(tag("&"), name, tag(";"))(input)
-}
+#[derive(Clone, Debug)]
+pub struct EntityRef<'s>(Name<'s>);
-pub type PEReference<'s> = &'s str;
/// [69] PEReference ::= '%' Name ';'
-pub fn pe_reference(input: &str) -> IResult<&str, PEReference> {
- delimited(tag("%"), name, tag(";"))(input)
-}
+#[derive(Clone, Debug)]
+#[repr(transparent)]
+pub struct PEReference<'s>(Name<'s>);
+/// [70] EntityDecl ::= GEDecl | PEDecl
#[derive(Debug)]
pub enum EntityDecl<'s> {
GEDecl(GEDecl<'s>),
PEDecl(PEDecl<'s>),
}
-/// [70] EntityDecl ::= GEDecl | PEDecl
-pub fn entity_decl(input: &str) -> IResult<&str, EntityDecl> {
- alt((
- map(ge_decl, |ge_decl| EntityDecl::GEDecl(ge_decl)),
- map(pe_decl, |pe_decl| EntityDecl::PEDecl(pe_decl)),
- ))(input)
-}
+/// [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
#[derive(Debug)]
pub struct GEDecl<'s> {
name: Name<'s>,
entity_def: EntityDef<'s>,
}
-/// [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
-pub fn ge_decl(input: &str) -> IResult<&str, GEDecl> {
- map(
- delimited(
- pair(tag("<!ENTITY"), s),
- separated_pair(name, s, entity_def),
- pair(opt(s), tag(">")),
- ),
- |(name, entity_def)| GEDecl { name, entity_def },
- )(input)
-}
+/// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
#[derive(Debug)]
pub struct PEDecl<'s> {
name: Name<'s>,
pe_def: PEDef<'s>,
}
-/// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
-pub fn pe_decl(input: &str) -> IResult<&str, PEDecl> {
- map(
- delimited(
- tuple((tag("<!ENTITY"), s, tag("%"), s)),
- separated_pair(name, s, pe_def),
- pair(opt(s), tag(">")),
- ),
- |(name, pe_def)| PEDecl { name, pe_def },
- )(input)
-}
+/// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
#[derive(Debug)]
pub enum EntityDef<'s> {
EntityValue(EntityValue<'s>),
@@ -1277,249 +526,62 @@ pub enum EntityDef<'s> {
ndata_decl: Option<NDataDecl<'s>>,
},
}
-/// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
-pub fn entity_def(input: &str) -> IResult<&str, EntityDef> {
- alt((
- map(entity_value, |entity_value| {
- EntityDef::EntityValue(entity_value)
- }),
- map(
- pair(external_id, opt(ndata_decl)),
- |(external_id, ndata_decl)| EntityDef::ExternalID {
- external_id,
- ndata_decl,
- },
- ),
- ))(input)
-}
+/// [74] PEDef ::= EntityValue | ExternalID
#[derive(Debug)]
pub enum PEDef<'s> {
EntityValue(EntityValue<'s>),
ExternalID(ExternalID<'s>),
}
-/// [74] PEDef ::= EntityValue | ExternalID
-pub fn pe_def(input: &str) -> IResult<&str, PEDef> {
- alt((
- map(entity_value, |entity_value| {
- PEDef::EntityValue(entity_value)
- }),
- map(external_id, |external_id| PEDef::ExternalID(external_id)),
- ))(input)
-}
+/// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
#[derive(Debug)]
pub enum ExternalID<'s> {
SYSTEM {
- system_identifier: &'s str,
+ system_identifier: SystemLiteral<'s>,
},
PUBLIC {
- public_identifier: &'s str,
- system_identifier: &'s str,
+ public_identifier: PubidLiteral<'s>,
+ system_identifier: SystemLiteral<'s>,
},
}
-/// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
-// pub fn external_id(input: &str) -> IResult<&str, ExternalID> {
-pub fn external_id(input: &str) -> IResult<&str, ExternalID> {
- alt((
- map(
- preceded(pair(tag("SYSTEM"), s), system_literal),
- |system_identifier| ExternalID::SYSTEM { system_identifier },
- ),
- map(
- preceded(
- pair(tag("PUBLIC"), s),
- separated_pair(pubid_literal, s, system_literal),
- ),
- |(public_identifier, system_identifier)| ExternalID::PUBLIC {
- public_identifier,
- system_identifier,
- },
- ),
- ))(input)
-}
-pub type NDataDecl<'s> = &'s str;
/// [76] NDataDecl ::= S 'NDATA' S Name
-pub fn ndata_decl(input: &str) -> IResult<&str, NDataDecl> {
- preceded(tuple((s, tag("NDATA"), s)), name)(input)
-}
+#[derive(Debug)]
+pub struct NDataDecl<'s>(Name<'s>);
+/// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
pub struct TextDecl<'s> {
version_info: Option<VersionInfo>,
encoding_decl: EncodingDecl<'s>,
}
-/// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
-pub fn text_decl(input: &str) -> IResult<&str, TextDecl> {
- map(
- delimited(
- tag("<?xml"),
- pair(opt(version_info), terminated(encoding_decl, opt(s))),
- tag("?>"),
- ),
- |(version_info, encoding_decl)| TextDecl {
- version_info,
- encoding_decl,
- },
- )(input)
-}
+/// [78] extParsedEnt ::= TextDecl? content
pub struct ExtParsedEnt<'s> {
text_decl: Option<TextDecl<'s>>,
content: Content<'s>,
}
-/// [78] extParsedEnt ::= TextDecl? content
-pub fn ext_parsed_ent(input: &str) -> IResult<&str, ExtParsedEnt> {
- map(pair(opt(text_decl), content), |(text_decl, content)| {
- ExtParsedEnt { text_decl, content }
- })(input)
-}
-pub type EncodingDecl<'s> = EncName<'s>;
/// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName
-pub fn encoding_decl(input: &str) -> IResult<&str, EncodingDecl> {
- preceded(
- tuple((s, tag("encoding"), eq)),
- alt((
- delimited(char('"'), enc_name, char('"')),
- delimited(char('\''), enc_name, char('\'')),
- )),
- )(input)
-}
+#[derive(Debug)]
+pub struct EncodingDecl<'s>(EncName<'s>);
-pub type EncName<'s> = &'s str;
/// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
-pub fn enc_name(input: &str) -> IResult<&str, EncName> {
- recognize(pair(
- satisfy(|c| matches!(c, 'A'..='Z' | 'a'..='z' )),
- many0(satisfy(
- |c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' ),
- )),
- ))(input)
-}
-
#[derive(Debug)]
-pub struct NotationDecl<'s> {
- name: &'s str,
- id: NotationDeclID<'s>,
-}
+pub struct EncName<'s>(&'s str);
+
#[derive(Debug)]
pub enum NotationDeclID<'s> {
External(ExternalID<'s>),
Public(PublicID<'s>),
}
/// [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
-pub fn notation_decl(input: &str) -> IResult<&str, NotationDecl> {
- map(
- delimited(
- pair(tag("<!NOTATION"), s),
- separated_pair(
- name,
- s,
- alt((
- map(external_id, |external_id| {
- NotationDeclID::External(external_id)
- }),
- map(public_id, |public_id| NotationDeclID::Public(public_id)),
- )),
- ),
- pair(opt(s), tag(">")),
- ),
- |(name, id)| NotationDecl { name, id },
- )(input)
+#[derive(Debug)]
+pub struct NotationDecl<'s> {
+ name: Name<'s>,
+ id: NotationDeclID<'s>,
}
-pub type PublicID<'s> = &'s str;
/// [83] PublicID ::= 'PUBLIC' S PubidLiteral
-pub fn public_id(input: &str) -> IResult<&str, PublicID> {
- preceded(pair(tag("PUBLIC"), s), pubid_literal)(input)
-}
-
-#[cfg(test)]
-mod tests {
- use std::num::NonZero;
-
- use super::*;
-
- #[test]
- fn test_char_data() {
- assert_eq!(Ok(("&def]]>ghi", "abc")), char_data("abc&def]]>ghi"));
- assert_eq!(Ok(("]]>ghi", "abcdef")), char_data("abcdef]]>ghi"));
- assert_eq!(Ok(("&defghi", "abc")), char_data("abc&defghi"));
- assert_eq!(Ok(("]]>def&ghi", "abc")), char_data("abc]]>def&ghi"));
- assert_eq!(Ok(("&ghi", "abc]>def")), char_data("abc]>def&ghi"));
- assert_eq!(
- Err(Err::Incomplete(nom::Needed::Size(
- NonZero::new(3usize).unwrap()
- ))),
- char_data("abcdefghi")
- );
- }
-
- #[test]
- fn test_comment() {
- assert_eq!(Ok(("", "")), comment("<!---->"));
- assert_eq!(Ok(("", "asdf")), comment("<!--asdf-->"));
- assert_eq!(Ok(("", "as-df")), comment("<!--as-df-->"));
- assert_eq!(
- Err(Err::Incomplete(nom::Needed::Size(
- NonZero::new(2usize).unwrap()
- ))),
- comment("<!--asdf")
- );
- }
-
- #[test]
- fn test_pi_target() {
- assert_eq!(Ok((" ", "asdf")), pi_target("asdf "));
- assert_eq!(Ok((" ", "xmlasdf")), pi_target("xmlasdf "));
- assert_eq!(
- Err(Err::Error(Error {
- input: "xml ",
- code: ErrorKind::Tag
- })),
- pi_target("xml ")
- );
- assert_eq!(
- Err(Err::Error(Error {
- input: "xMl ",
- code: ErrorKind::Tag
- })),
- pi_target("xMl ")
- );
- }
-
- #[test]
- fn test_cd_sect() {
- assert_eq!(
- Ok((
- "",
- ("<![CDATA[", "<greeting>Hello, world!</greeting>", "]]>")
- )),
- cd_sect("<![CDATA[<greeting>Hello, world!</greeting>]]>")
- )
- }
-
- #[test]
- fn test_cd_start() {
- assert_eq!(Ok(("asdf", "<![CDATA[")), cd_start("<![CDATA[asdf"))
- }
-
- #[test]
- fn test_cdata() {
- assert_eq!(Ok(("]]>asdf", "asdf")), cdata("asdf]]>asdf"));
- assert_eq!(
- Ok(("]]>asdf", "<![CDATA[asdf")),
- cdata("<![CDATA[asdf]]>asdf")
- );
- assert_eq!(
- Ok(("]]>asdf", "<greeting>Hello, world!</greeting>")),
- cdata("<greeting>Hello, world!</greeting>]]>asdf")
- )
- }
-
- #[test]
- fn test_cd_end() {
- assert_eq!(Ok(("asdf", "]]>")), cd_end("]]>asdf"))
- }
-}
+#[derive(Debug)]
+pub struct PublicID<'s>(PubidLiteral<'s>);
diff --git a/src/xml/parsers.rs b/src/xml/parsers.rs
index 8b13789..135f5c7 100644
--- a/src/xml/parsers.rs
+++ b/src/xml/parsers.rs
@@ -1 +1,1503 @@
+use nom::{
+ branch::alt,
+ bytes::streaming::{is_a, tag, take, take_while},
+ character::{
+ complete::one_of,
+ streaming::{char, none_of, satisfy},
+ },
+ combinator::{map, not, opt, peek, recognize, value},
+ error::{Error, ErrorKind},
+ multi::{many0, many1, many_till},
+ sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
+ Err, IResult, Parser as NomParser,
+};
+use crate::xml::NSAttName;
+
+use super::{
+ AttDef, AttDefName, AttType, AttValue, AttlistDecl, Attribute, CDEnd, CDSect, CDStart, CData,
+ Char, CharData, CharRef, Children, ChildrenKind, Choice, Comment, ConditionalSect, Content,
+ ContentItem, Contentspec, Cp, CpKind, DeclSep, DefaultAttName, DefaultDecl, DoctypeDecl,
+ Document, ETag, Element, Elementdecl, EmptyElemTag, EncName, EncodingDecl, EntityDecl,
+ EntityDef, EntityRef, EntityValue, EnumeratedType, Enumeration, Eq, ExtParsedEnt, ExtSubset,
+ ExtSubsetDecl, ExtSubsetDeclaration, ExternalID, GEDecl, Ignore, IgnoreSect,
+ IgnoreSectContents, IncludeSect, IntSubset, IntSubsetDeclaration, LiteralData, LocalPart,
+ MarkupDecl, Misc, Mixed, NCName, NDataDecl, Name, NameChar, NameStartChar, Names, Nmtoken,
+ Nmtokens, NotationDecl, NotationDeclID, NotationType, Occurence, PEDecl, PEDef, PEReference,
+ PITarget, Prefix, PrefixedAttName, PrefixedName, Prolog, PubidChar, PubidLiteral, PublicID,
+ QName, Reference, SDDecl, STag, Seq, StringType, SystemLiteral, TextDecl, TokenizedType,
+ UnprefixedName, VersionInfo, VersionNum, XMLDecl, PI, S,
+};
+
+pub trait Parser<'s, T> {
+ fn parse(input: &'s str) -> IResult<&str, T>;
+}
+
+/// [1] NSAttName ::= PrefixedAttName | DefaultAttName
+impl<'s> Parser<'s, NSAttName<'s>> for NSAttName<'s> {
+ fn parse(input: &'s str) -> IResult<&str, NSAttName<'s>> {
+ alt((
+ map(PrefixedAttName::parse, |prefixed_att_name| {
+ NSAttName::PrefixedAttName(prefixed_att_name)
+ }),
+ value(NSAttName::DefaultAttName, DefaultAttName::parse),
+ ))(input)
+ }
+}
+
+/// [2] PrefixedAttName ::= 'xmlns:' NCName
+impl<'s> Parser<'s, PrefixedAttName<'s>> for PrefixedAttName<'s> {
+ fn parse(input: &'s str) -> IResult<&str, PrefixedAttName<'s>> {
+ map(preceded(tag("xmlns:"), NCName::parse), |nc_name| {
+ PrefixedAttName(nc_name)
+ })(input)
+ }
+}
+
+/// [3] DefaultAttName ::= 'xmlns';
+impl Parser<'_, DefaultAttName> for DefaultAttName {
+ fn parse(input: &str) -> IResult<&str, DefaultAttName> {
+ value(DefaultAttName, tag("xmlns"))(input)
+ }
+}
+
+/// [4] NCName ::= Name - (Char* ':' Char*)
+impl<'s> Parser<'s, NCName<'s>> for NCName<'s> {
+ fn parse(input: &'s str) -> IResult<&str, NCName<'s>> {
+ map(
+ recognize(pair(
+ recognize(NameStartChar::parse).and_then(satisfy(|c| c != ':')),
+ many_till(NameChar::parse, peek(char(':'))),
+ )),
+ |nc_name| NCName(nc_name),
+ )(input)
+ }
+}
+
+/// [7] QName ::= PrefixedName | UnprefixedName
+impl<'s> Parser<'s, QName<'s>> for QName<'s> {
+ fn parse(input: &'s str) -> IResult<&str, QName<'s>> {
+ alt((
+ map(PrefixedName::parse, |prefixed_name| {
+ QName::PrefixedName(prefixed_name)
+ }),
+ map(UnprefixedName::parse, |unprefixed_name| {
+ QName::UnprefixedName(unprefixed_name)
+ }),
+ ))(input)
+ }
+}
+
+/// [8] PrefixedName ::= Prefix ':' LocalPart
+impl<'s> Parser<'s, PrefixedName<'s>> for PrefixedName<'s> {
+ fn parse(input: &'s str) -> IResult<&str, PrefixedName<'s>> {
+ map(
+ separated_pair(Prefix::parse, char(':'), LocalPart::parse),
+ |(prefix, local_part)| PrefixedName { prefix, local_part },
+ )(input)
+ }
+}
+
+/// [9] UnprefixedName ::= LocalPart
+impl<'s> Parser<'s, UnprefixedName<'s>> for UnprefixedName<'s> {
+ fn parse(input: &'s str) -> IResult<&str, UnprefixedName<'s>> {
+ map(LocalPart::parse, |local_part| UnprefixedName(local_part))(input)
+ }
+}
+
+/// [10] Prefix ::= NCName
+impl<'s> Parser<'s, Prefix<'s>> for Prefix<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Prefix<'s>> {
+ map(NCName::parse, |nc_name| Prefix(nc_name))(input)
+ }
+}
+
+/// [11] LocalPart ::= NCName
+impl<'s> Parser<'s, LocalPart<'s>> for LocalPart<'s> {
+ fn parse(input: &'s str) -> IResult<&str, LocalPart<'s>> {
+ map(NCName::parse, |nc_name| LocalPart(nc_name))(input)
+ }
+}
+
+// xml spec
+
+/// [1] document ::= prolog element Misc*
+impl<'s> Parser<'s, Document<'s>> for Document<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Document<'s>> {
+ tuple((Prolog::parse, Element::parse, many0(Misc::parse)))(input)
+ }
+}
+
+/// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
+impl Parser<'_, Char> for Char {
+ fn parse(input: &str) -> IResult<&str, Char> {
+ map(
+ satisfy(
+ |c| matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}'),
+ ),
+ |char| Char(char),
+ )(input)
+ }
+}
+
+/// [3] S ::= (#x20 | #x9 | #xD | #xA)+
+impl<'s> Parser<'s, S<'s>> for S<'s> {
+ fn parse(input: &'s str) -> IResult<&str, S<'s>> {
+ map(is_a("\u{20}\u{9}\u{D}\u{A}"), |s| S(s))(input)
+ }
+}
+
+/// [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
+impl Parser<'_, NameStartChar> for NameStartChar {
+ fn parse(input: &str) -> IResult<&str, NameStartChar> {
+ map(
+ satisfy(
+ |c| matches!(c, ':' | 'A'..='Z' | '_' | 'a'..='z' | '\u{C0}'..='\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' | '\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}'),
+ ),
+ |c| NameStartChar(c),
+ )(input)
+ }
+}
+
+/// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
+impl Parser<'_, NameChar> for NameChar {
+ fn parse(input: &str) -> IResult<&str, NameChar> {
+ map(
+ alt((
+ map(NameStartChar::parse, |NameStartChar(c)| c),
+ satisfy(
+ |c| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'),
+ ),
+ )),
+ |c| NameChar(c),
+ )(input)
+ }
+}
+
+/// [5] Name ::= NameStartChar (NameChar)*
+impl<'s> Parser<'s, Name<'s>> for Name<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Name<'s>> {
+ map(
+ recognize(pair(NameStartChar::parse, many0(NameChar::parse))),
+ |name| Name(name),
+ )(input)
+ }
+}
+
+/// [6] Names ::= Name (#x20 Name)*
+impl<'s> Parser<'s, Names<'s>> for Names<'s> {
+ // TODO: fix
+ fn parse(input: &'s str) -> IResult<&str, Names<'s>> {
+ map(
+ recognize(pair(Name::parse, many0(pair(char('\u{20}'), Name::parse)))),
+ |names| Names(names),
+ )(input)
+ }
+}
+
+/// [7] Nmtoken ::= (NameChar)+
+impl<'s> Parser<'s, Nmtoken<'s>> for Nmtoken<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Nmtoken<'s>> {
+ map(recognize(many1(NameChar::parse)), |nmtoken| {
+ Nmtoken(nmtoken)
+ })(input)
+ }
+}
+
+/// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
+impl<'s> Parser<'s, Nmtokens<'s>> for Nmtokens<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Nmtokens<'s>> {
+ map(
+ recognize(pair(
+ Nmtoken::parse,
+ many0(pair(char('\u{20}'), Nmtoken::parse)),
+ )),
+ |nmtokens| Nmtokens(nmtokens),
+ )(input)
+ }
+}
+
+/// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
+/// | "'" ([^%&'] | PEReference | Reference)* "'"
+impl<'s> Parser<'s, EntityValue<'s>> for EntityValue<'s> {
+ fn parse(input: &'s str) -> IResult<&str, EntityValue<'s>> {
+ map(
+ alt((
+ delimited(
+ char('"'),
+ many0(alt((
+ map(
+ recognize(many_till(take(1usize), peek(one_of("%&\"")))),
+ |string| LiteralData::String(string),
+ ),
+ map(PEReference::parse, |pe_reference| {
+ LiteralData::PEReference(pe_reference)
+ }),
+ map(Reference::parse, |reference| {
+ LiteralData::Reference(reference)
+ }),
+ ))),
+ char('"'),
+ ),
+ delimited(
+ char('\''),
+ many0(alt((
+ map(
+ recognize(many_till(take(1usize), peek(one_of("%&'")))),
+ |string| LiteralData::String(string),
+ ),
+ map(PEReference::parse, |pe_reference| {
+ LiteralData::PEReference(pe_reference)
+ }),
+ map(Reference::parse, |reference| {
+ LiteralData::Reference(reference)
+ }),
+ ))),
+ char('\''),
+ ),
+ )),
+ |entity_value| EntityValue(entity_value),
+ )(input)
+ }
+}
+
+/// [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
+/// | "'" ([^<&'] | Reference)* "'"
+impl<'s> Parser<'s, AttValue<'s>> for AttValue<'s> {
+ fn parse(input: &'s str) -> IResult<&str, AttValue<'s>> {
+ map(
+ alt((
+ delimited(
+ char('"'),
+ many0(alt((
+ map(
+ recognize(many_till(take(1usize), peek(one_of("%&\"")))),
+ |string| LiteralData::String(string),
+ ),
+ map(Reference::parse, |reference| {
+ LiteralData::Reference(reference)
+ }),
+ ))),
+ char('"'),
+ ),
+ delimited(
+ char('\''),
+ many0(alt((
+ map(
+ recognize(many_till(take(1usize), peek(one_of("%&'")))),
+ |string| LiteralData::String(string),
+ ),
+ map(Reference::parse, |reference| {
+ LiteralData::Reference(reference)
+ }),
+ ))),
+ char('\''),
+ ),
+ )),
+ |att_value| AttValue(att_value),
+ )(input)
+ }
+}
+
+/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
+impl<'s> Parser<'s, SystemLiteral<'s>> for SystemLiteral<'s> {
+ fn parse(input: &'s str) -> IResult<&str, SystemLiteral<'s>> {
+ map(
+ alt((
+ delimited(char('"'), recognize(many0(none_of("\""))), char('"')),
+ delimited(char('\''), recognize(many0(none_of("'"))), char('\'')),
+ )),
+ |system_literal| SystemLiteral(system_literal),
+ )(input)
+ }
+}
+
+/// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
+impl<'s> Parser<'s, PubidLiteral<'s>> for PubidLiteral<'s> {
+ fn parse(input: &'s str) -> IResult<&str, PubidLiteral<'s>> {
+ map(
+ alt((
+ delimited(char('"'), recognize(many0(PubidChar::parse)), char('"')),
+ delimited(
+ char('\''),
+ recognize(many0(recognize(not(char('\''))).and_then(PubidChar::parse))),
+ char('\''),
+ ),
+ )),
+ |pubid_literal| PubidLiteral(pubid_literal),
+ )(input)
+ }
+}
+
+/// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
+impl Parser<'_, PubidChar> for PubidChar {
+ fn parse(input: &'_ str) -> IResult<&str, PubidChar> {
+ map(
+ satisfy(
+ |c| matches!(c, '\u{20}' | '\u{D}' | '\u{A}' | 'a'..='z' | 'A'..='Z' | '0'..='9'),
+ ),
+ |pubid_char| PubidChar(pubid_char),
+ )(input)
+ }
+}
+
+/// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
+impl<'s> Parser<'s, CharData<'s>> for CharData<'s> {
+ fn parse(input: &'s str) -> IResult<&str, CharData<'s>> {
+ map(
+ recognize(many_till(
+ none_of("<&"),
+ peek(alt((recognize(one_of("<&")), tag("]]>")))),
+ )),
+ |char_data| CharData(char_data),
+ )(input)
+ }
+}
+
+/// Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
+impl<'s> Parser<'s, Comment<'s>> for Comment<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Comment<'s>> {
+ map(
+ delimited(
+ tag("<!--"),
+ recognize(many_till(Char::parse, peek(tag("--")))),
+ tag("-->"),
+ ),
+ |comment| Comment(comment),
+ )(input)
+ }
+}
+
+/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
+impl<'s> Parser<'s, PI<'s>> for PI<'s> {
+ fn parse(input: &'s str) -> IResult<&str, PI<'s>> {
+ map(
+ delimited(
+ tag("<?"),
+ pair(
+ PITarget::parse,
+ opt(recognize(pair(
+ S::parse,
+ many_till(Char::parse, peek(tag("?>"))),
+ ))),
+ ),
+ tag("?>"),
+ ),
+ |(target, instruction)| PI {
+ target,
+ instruction,
+ },
+ )(input)
+ }
+}
+
+/// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
+impl<'s> Parser<'s, PITarget<'s>> for PITarget<'s> {
+ fn parse(input: &'s str) -> IResult<&str, PITarget<'s>> {
+ let (rest, name) = Name::parse(input)?;
+ if name.0.to_lowercase() == "xml" {
+ return Err(Err::Error(Error {
+ input,
+ // TODO: check if better error to return
+ code: ErrorKind::Tag,
+ }));
+ } else {
+ return Ok((rest, PITarget(name)));
+ }
+ }
+}
+
+/// [18] CDSect ::= CDStart CData CDEnd
+impl<'s> Parser<'s, CDSect<'s>> for CDSect<'s> {
+ fn parse(input: &'s str) -> IResult<&str, CDSect<'s>> {
+ map(CData::parse, |c_data| CDSect(c_data))(input)
+ }
+}
+
+/// [19] CDStart ::= '<![CDATA['
+impl Parser<'_, CDStart> for CDStart {
+ fn parse(input: &'_ str) -> IResult<&str, CDStart> {
+ value(CDStart, tag("<![CDATA["))(input)
+ }
+}
+
+/// [20] CData ::= (Char* - (Char* ']]>' Char*))
+impl<'s> Parser<'s, CData<'s>> for CData<'s> {
+ fn parse(input: &'s str) -> IResult<&str, CData<'s>> {
+ map(
+ recognize(many_till(Char::parse, peek(tag("]]>")))),
+ |c_data| CData(c_data),
+ )(input)
+ }
+}
+
+/// [21] CDEnd ::= ']]>'
+impl Parser<'_, CDEnd> for CDEnd {
+ fn parse(input: &'_ str) -> IResult<&str, CDEnd> {
+ value(CDEnd, tag("]]>"))(input)
+ }
+}
+
+/// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
+impl<'s> Parser<'s, Prolog<'s>> for Prolog<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Prolog<'s>> {
+ tuple((
+ opt(XMLDecl::parse),
+ many0(Misc::parse),
+ opt(tuple((DoctypeDecl::parse, many0(Misc::parse)))),
+ ))(input)
+ }
+}
+
+/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
+impl<'s> Parser<'s, XMLDecl<'s>> for XMLDecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, XMLDecl<'s>> {
+ map(
+ delimited(
+ tag("<?xml"),
+ tuple((
+ VersionInfo::parse,
+ opt(EncodingDecl::parse),
+ opt(SDDecl::parse),
+ )),
+ pair(opt(S::parse), tag("?>")),
+ ),
+ |(version_info, encoding_decl, sd_decl)| XMLDecl {
+ version_info,
+ encoding_decl,
+ sd_decl,
+ },
+ )(input)
+ }
+}
+
+/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
+impl Parser<'_, VersionInfo> for VersionInfo {
+ fn parse(input: &'_ str) -> IResult<&str, VersionInfo> {
+ map(
+ preceded(
+ tuple((S::parse, tag("version"), Eq::parse)),
+ alt((
+ delimited(char('\''), VersionNum::parse, char('\'')),
+ delimited(char('"'), VersionNum::parse, char('"')),
+ )),
+ ),
+ |version_num| VersionInfo(version_num),
+ )(input)
+ }
+}
+
+/// [25] Eq ::= S? '=' S?
+impl Parser<'_, Eq> for Eq {
+ fn parse(input: &'_ str) -> IResult<&str, Eq> {
+ value(
+ Eq,
+ recognize(tuple((opt(S::parse), char('='), opt(S::parse)))),
+ )(input)
+ }
+}
+
+/// [26] VersionNum ::= '1.' [0-9]+
+impl Parser<'_, VersionNum> for VersionNum {
+ fn parse(input: &'_ str) -> IResult<&str, VersionNum> {
+ preceded(
+ tag("1."),
+ alt((
+ value(VersionNum::One, char('0')),
+ value(VersionNum::OneDotOne, char('1')),
+ )),
+ )(input)
+ }
+}
+
+/// [27] Misc ::= Comment | PI | S
+impl<'s> Parser<'s, Misc<'s>> for Misc<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Misc<'s>> {
+ alt((
+ map(Comment::parse, |comment| Misc::Comment(comment)),
+ map(PI::parse, |pi| Misc::PI(pi)),
+ value(Misc::S, S::parse),
+ ))(input)
+ }
+}
+
+/// [16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
+/// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
+impl<'s> Parser<'s, DoctypeDecl<'s>> for DoctypeDecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, DoctypeDecl<'s>> {
+ map(
+ delimited(
+ pair(tag("<!DOCTYPE"), S::parse),
+ tuple((
+ QName::parse,
+ opt(preceded(S::parse, ExternalID::parse)),
+ preceded(
+ opt(S::parse),
+ opt(terminated(
+ delimited(tag("["), IntSubset::parse, tag("]")),
+ opt(S::parse),
+ )),
+ ),
+ )),
+ tag(">"),
+ ),
+ |(name, external_id, int_subset)| DoctypeDecl {
+ name,
+ external_id,
+ int_subset,
+ },
+ )(input)
+ }
+}
+
+/// [28a] DeclSep ::= PEReference | S
+impl<'s> Parser<'s, DeclSep<'s>> for DeclSep<'s> {
+ fn parse(input: &'s str) -> IResult<&str, DeclSep<'s>> {
+ alt((
+ map(PEReference::parse, |pe_reference| {
+ DeclSep::PEReference(pe_reference)
+ }),
+ value(DeclSep::S, S::parse),
+ ))(input)
+ }
+}
+
+/// [28b] intSubset ::= (markupdecl | DeclSep)*
+impl<'s> Parser<'s, IntSubset<'s>> for IntSubset<'s> {
+ fn parse(input: &'s str) -> IResult<&str, IntSubset<'s>> {
+ many0(alt((
+ map(MarkupDecl::parse, |markup_decl| {
+ IntSubsetDeclaration::MarkupDecl(markup_decl)
+ }),
+ map(DeclSep::parse, |decl_sep| {
+ IntSubsetDeclaration::DeclSep(decl_sep)
+ }),
+ )))(input)
+ }
+}
+
+/// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
+impl<'s> Parser<'s, MarkupDecl<'s>> for MarkupDecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, MarkupDecl<'s>> {
+ alt((
+ map(Elementdecl::parse, |elementdecl| {
+ MarkupDecl::Elementdecl(elementdecl)
+ }),
+ map(AttlistDecl::parse, |attlist_decl| {
+ MarkupDecl::AttlistDecl(attlist_decl)
+ }),
+ map(EntityDecl::parse, |entity_decl| {
+ MarkupDecl::EntityDecl(entity_decl)
+ }),
+ map(NotationDecl::parse, |notation_decl| {
+ MarkupDecl::NotationDecl(notation_decl)
+ }),
+ map(PI::parse, |pi| MarkupDecl::PI(pi)),
+ map(Comment::parse, |comment| MarkupDecl::Comment(comment)),
+ ))(input)
+ }
+}
+
+/// [30] extSubset ::= TextDecl? extSubsetDecl
+impl<'s> Parser<'s, ExtSubset<'s>> for ExtSubset<'s> {
+ fn parse(input: &'s str) -> IResult<&str, ExtSubset<'s>> {
+ map(
+ pair(opt(TextDecl::parse), ExtSubsetDecl::parse),
+ |(text_decl, ext_subset_decl)| ExtSubset {
+ text_decl,
+ ext_subset_decl,
+ },
+ )(input)
+ }
+}
+
+/// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
+impl<'s> Parser<'s, ExtSubsetDecl<'s>> for ExtSubsetDecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, ExtSubsetDecl<'s>> {
+ many0(alt((
+ map(MarkupDecl::parse, |markup_decl| {
+ ExtSubsetDeclaration::MarkupDecl(markup_decl)
+ }),
+ map(ConditionalSect::parse, |conditional_sect| {
+ ExtSubsetDeclaration::ConditionalSect(conditional_sect)
+ }),
+ map(DeclSep::parse, |decl_sep| {
+ ExtSubsetDeclaration::DeclSep(decl_sep)
+ }),
+ )))(input)
+ }
+}
+
+/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
+impl Parser<'_, SDDecl> for SDDecl {
+ fn parse(input: &'_ str) -> IResult<&str, SDDecl> {
+ preceded(
+ tuple((S::parse, tag("standalone"), Eq::parse)),
+ alt((
+ delimited(
+ char('\''),
+ alt((value(true, tag("yes")), value(false, tag("no")))),
+ char('\''),
+ ),
+ delimited(
+ char('"'),
+ alt((value(true, tag("yes")), value(false, tag("no")))),
+ char('"'),
+ ),
+ )),
+ )(input)
+ }
+}
+
+// (Productions 33 through 38 have been removed.)
+
+/// [39] element ::= EmptyElemTag | STag content ETag
+impl<'s> Parser<'s, Element<'s>> for Element<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Element<'s>> {
+ alt((
+ map(EmptyElemTag::parse, |empty_elem_tag| {
+ Element::Empty(empty_elem_tag)
+ }),
+ map(
+ tuple((STag::parse, Content::parse, ETag::parse)),
+ |(s_tag, content, e_tag)| Element::NotEmpty(s_tag, content, e_tag),
+ ),
+ ))(input)
+ }
+}
+
+/// [12] STag ::= '<' QName (S Attribute)* S? '>'
+/// [40] STag ::= '<' Name (S Attribute)* S? '>'
+impl<'s> Parser<'s, STag<'s>> for STag<'s> {
+ fn parse(input: &'s str) -> IResult<&str, STag<'s>> {
+ map(
+ delimited(
+ tag("<"),
+ pair(QName::parse, many0(preceded(S::parse, Attribute::parse))),
+ pair(opt(S::parse), tag(">")),
+ ),
+ |(name, attributes)| STag { name, attributes },
+ )(input)
+ }
+}
+
+/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue
+impl<'s> Parser<'s, Attribute<'s>> for Attribute<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Attribute<'s>> {
+ alt((
+ map(
+ separated_pair(NSAttName::parse, Eq::parse, AttValue::parse),
+ |(ns_name, value)| Attribute::NamespaceDeclaration { ns_name, value },
+ ),
+ map(
+ separated_pair(QName::parse, Eq::parse, AttValue::parse),
+ |(name, value)| Attribute::Attribute { name, value },
+ ),
+ ))(input)
+ }
+}
+// pub type Attribute<'s> = (Name<'s>, AttValue<'s>);
+/// [41] Attribute ::= Name Eq AttValue
+// pub fn attribute(input: &str) -> IResult<&str, Attribute> {
+// separated_pair(name, eq, att_value)(input)
+// }
+
+/// [13] ETag ::= '</' QName S? '>'
+/// [42] ETag ::= '</' Name S? '>'
+impl<'s> Parser<'s, ETag<'s>> for ETag<'s> {
+ fn parse(input: &'s str) -> IResult<&str, ETag<'s>> {
+ map(
+ delimited(tag("</"), QName::parse, pair(opt(S::parse), tag(">"))),
+ |name| ETag { name },
+ )(input)
+ }
+}
+
+/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
+impl<'s> Parser<'s, Content<'s>> for Content<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Content<'s>> {
+ map(
+ pair(
+ opt(CharData::parse),
+ many0(pair(
+ alt((
+ map(Element::parse, |element| ContentItem::Element(element)),
+ map(Reference::parse, |reference| {
+ ContentItem::Reference(reference)
+ }),
+ map(CDSect::parse, |cd_sect| ContentItem::CDSect(cd_sect)),
+ map(PI::parse, |pi| ContentItem::PI(pi)),
+ map(Comment::parse, |comment| ContentItem::Comment(comment)),
+ )),
+ opt(CharData::parse),
+ )),
+ ),
+ |(char_data, content)| Content { char_data, content },
+ )(input)
+ }
+}
+
+/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
+/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
+impl<'s> Parser<'s, EmptyElemTag<'s>> for EmptyElemTag<'s> {
+ fn parse(input: &'s str) -> IResult<&str, EmptyElemTag<'s>> {
+ map(
+ delimited(
+ tag("<"),
+ pair(QName::parse, many0(preceded(S::parse, Attribute::parse))),
+ pair(opt(S::parse), tag("/>")),
+ ),
+ |(name, attributes)| EmptyElemTag { name, attributes },
+ )(input)
+ }
+}
+
+/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'
+/// [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
+impl<'s> Parser<'s, Elementdecl<'s>> for Elementdecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Elementdecl> {
+ map(
+ delimited(
+ pair(tag("<!ELEMENT"), S::parse),
+ separated_pair(QName::parse, S::parse, Contentspec::parse),
+ pair(opt(S::parse), tag(">")),
+ ),
+ |(name, contentspec)| Elementdecl { name, contentspec },
+ )(input)
+ }
+}
+
+/// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
+impl<'s> Parser<'s, Contentspec<'s>> for Contentspec<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Contentspec<'s>> {
+ alt((
+ value(Contentspec::Empty, tag("EMPTY")),
+ value(Contentspec::Any, tag("ANY")),
+ map(Mixed::parse, |mixed| Contentspec::Mixed(mixed)),
+ map(Children::parse, |children| Contentspec::Children(children)),
+ ))(input)
+ }
+}
+
+/// Occurence ::= ('?' | '*' | '+')?
+impl Parser<'_, Occurence> for Occurence {
+ fn parse(input: &'_ str) -> IResult<&str, Occurence> {
+ map(
+ opt(alt((tag("?"), tag("*"), tag("+")))),
+ |occurence| match occurence {
+ Some("?") => Occurence::Optional,
+ Some("*") => Occurence::Many0,
+ Some("+") => Occurence::Many1,
+ _ => Occurence::Once,
+ },
+ )(input)
+ }
+}
+
+/// [47] children ::= (choice | seq) ('?' | '*' | '+')?
+impl<'s> Parser<'s, Children<'s>> for Children<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Children<'s>> {
+ map(
+ pair(
+ alt((
+ map(Choice::parse, |choice| ChildrenKind::Choice(choice)),
+ map(Seq::parse, |seq| ChildrenKind::Seq(seq)),
+ )),
+ Occurence::parse,
+ ),
+ |(kind, occurence)| Children { kind, occurence },
+ )(input)
+ }
+}
+
+/// [18] cp ::= (QName | choice | seq) ('?' | '*' | '+')?
+/// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
+impl<'s> Parser<'s, Cp<'s>> for Cp<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Cp<'s>> {
+ map(
+ pair(
+ alt((
+ map(QName::parse, |name| CpKind::Name(name)),
+ map(Choice::parse, |choice| CpKind::Choice(choice)),
+ map(Seq::parse, |seq| CpKind::Seq(seq)),
+ )),
+ Occurence::parse,
+ ),
+ |(kind, occurence)| Cp { kind, occurence },
+ )(input)
+ }
+}
+
+/// [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
+impl<'s> Parser<'s, Choice<'s>> for Choice<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Choice<'s>> {
+ map(
+ delimited(
+ pair(tag("("), opt(S::parse)),
+ pair(
+ Cp::parse,
+ many1(preceded(
+ tuple((opt(S::parse), tag("|"), opt(S::parse))),
+ Cp::parse,
+ )),
+ ),
+ pair(opt(S::parse), tag(")")),
+ ),
+ |(head, tail)| {
+ let choice = vec![vec![head], tail].concat();
+ Choice(choice)
+ },
+ )(input)
+ }
+}
+
+/// [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
+impl<'s> Parser<'s, Seq<'s>> for Seq<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Seq<'s>> {
+ map(
+ delimited(
+ pair(tag("("), opt(S::parse)),
+ pair(
+ Cp::parse,
+ many0(preceded(
+ tuple((opt(S::parse), tag(","), opt(S::parse))),
+ Cp::parse,
+ )),
+ ),
+ pair(opt(S::parse), tag(")")),
+ ),
+ |(head, tail)| {
+ let seq = vec![vec![head], tail].concat();
+ Seq(seq)
+ },
+ )(input)
+ }
+}
+
+/// [19] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'
+/// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
+impl<'s> Parser<'s, Mixed<'s>> for Mixed<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Mixed<'s>> {
+ alt((
+ map(
+ delimited(
+ tuple((tag("("), S::parse, tag("#PCDATA"))),
+ many0(preceded(
+ tuple((opt(S::parse), tag("|"), opt(S::parse))),
+ QName::parse,
+ )),
+ pair(opt(S::parse), tag(")*")),
+ ),
+ |names| Mixed(names),
+ ),
+ value(
+ Mixed(Vec::new()),
+ tuple((
+ tag("("),
+ opt(S::parse),
+ tag("#PCDATA"),
+ opt(S::parse),
+ tag(")"),
+ )),
+ ),
+ ))(input)
+ }
+}
+
+/// [20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'
+/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
+impl<'s> Parser<'s, AttlistDecl<'s>> for AttlistDecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, AttlistDecl<'s>> {
+ map(
+ delimited(
+ pair(tag("<!ATTLIST"), S::parse),
+ pair(QName::parse, many0(AttDef::parse)),
+ pair(opt(S::parse), tag(">")),
+ ),
+ |(element_type, att_defs)| AttlistDecl {
+ element_type,
+ att_defs,
+ },
+ )(input)
+ }
+}
+
+/// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
+/// [53] AttDef ::= S Name S AttType S DefaultDecl
+impl<'s> Parser<'s, AttDef<'s>> for AttDef<'s> {
+ fn parse(input: &'s str) -> IResult<&str, AttDef<'s>> {
+ map(
+ tuple((
+ preceded(
+ S::parse,
+ alt((
+ map(QName::parse, |q_name| AttDefName::QName(q_name)),
+ map(NSAttName::parse, |ns_att_name| {
+ AttDefName::NSAttName(ns_att_name)
+ }),
+ )),
+ ),
+ preceded(S::parse, AttType::parse),
+ preceded(S::parse, DefaultDecl::parse),
+ )),
+ |(name, att_type, default_decl)| AttDef {
+ name,
+ att_type,
+ default_decl,
+ },
+ )(input)
+ }
+}
+
+/// [54] AttType ::= StringType | TokenizedType | EnumeratedType
+impl<'s> Parser<'s, AttType<'s>> for AttType<'s> {
+ fn parse(input: &'s str) -> IResult<&str, AttType<'s>> {
+ alt((
+ value(AttType::StringType, StringType::parse),
+ map(TokenizedType::parse, |tokenized_type| {
+ AttType::TokenizedType(tokenized_type)
+ }),
+ map(EnumeratedType::parse, |enumerated_type| {
+ AttType::EnumeratedType(enumerated_type)
+ }),
+ ))(input)
+ }
+}
+
+/// [55] StringType ::= 'CDATA'
+impl Parser<'_, StringType> for StringType {
+ fn parse(input: &'_ str) -> IResult<&str, StringType> {
+ value(StringType, tag("CDATA"))(input)
+ }
+}
+
+/// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
+impl Parser<'_, TokenizedType> for TokenizedType {
+ fn parse(input: &'_ str) -> IResult<&str, TokenizedType> {
+ alt((
+ value(TokenizedType::ID, tag("ID")),
+ // TODO: check if this is required
+ // try idrefs first to avoid losing 'S'
+ value(TokenizedType::IDRefs, tag("IDREFS")),
+ value(TokenizedType::IDRef, tag("IDREF")),
+ value(TokenizedType::Entity, tag("ENTITY")),
+ value(TokenizedType::Entities, tag("ENTITIES")),
+ // same here
+ value(TokenizedType::NMTokens, tag("NMTOKENS")),
+ value(TokenizedType::NMToken, tag("NMTOKEN")),
+ ))(input)
+ }
+}
+
+/// [57] EnumeratedType ::= NotationType | Enumeration
+impl<'s> Parser<'s, EnumeratedType<'s>> for EnumeratedType<'s> {
+ fn parse(input: &'s str) -> IResult<&str, EnumeratedType<'s>> {
+ alt((
+ map(NotationType::parse, |notation_type| {
+ EnumeratedType::NotationType(notation_type)
+ }),
+ map(Enumeration::parse, |enumeration| {
+ EnumeratedType::Enumeration(enumeration)
+ }),
+ ))(input)
+ }
+}
+
+/// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
+impl<'s> Parser<'s, NotationType<'s>> for NotationType<'s> {
+ fn parse(input: &'s str) -> IResult<&str, NotationType<'s>> {
+ map(
+ delimited(
+ tuple((tag("NOTATION"), S::parse, tag("("), opt(S::parse))),
+ pair(
+ Name::parse,
+ many0(preceded(
+ tuple((opt(S::parse), tag("|"), opt(S::parse))),
+ Name::parse,
+ )),
+ ),
+ pair(opt(S::parse), tag(")")),
+ ),
+ |(head, tail)| {
+ let notation_type = vec![vec![head], tail].concat();
+ NotationType(notation_type)
+ },
+ )(input)
+ }
+}
+
+/// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
+impl<'s> Parser<'s, Enumeration<'s>> for Enumeration<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Enumeration<'s>> {
+ map(
+ delimited(
+ pair(tag("("), opt(S::parse)),
+ pair(
+ Nmtoken::parse,
+ many0(preceded(
+ tuple((opt(S::parse), tag("|"), opt(S::parse))),
+ Nmtoken::parse,
+ )),
+ ),
+ pair(opt(S::parse), tag(")")),
+ ),
+ |(head, tail)| {
+ let enumeration = vec![vec![head], tail].concat();
+ Enumeration(enumeration)
+ },
+ )(input)
+ }
+}
+
+/// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
+impl<'s> Parser<'s, DefaultDecl<'s>> for DefaultDecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, DefaultDecl<'s>> {
+ alt((
+ value(DefaultDecl::Required, tag("#REQUIRED")),
+ value(DefaultDecl::Implied, tag("#IMPLIED")),
+ map(
+ preceded(opt(pair(tag("#FIXED"), S::parse)), AttValue::parse),
+ |att_value| DefaultDecl::Fixed(att_value),
+ ),
+ ))(input)
+ }
+}
+
+/// [61] conditionalSect ::= includeSect | ignoreSect
+impl<'s> Parser<'s, ConditionalSect<'s>> for ConditionalSect<'s> {
+ fn parse(input: &'s str) -> IResult<&str, ConditionalSect<'s>> {
+ alt((
+ map(IncludeSect::parse, |include_sect| {
+ ConditionalSect::IncludeSect(include_sect)
+ }),
+ map(IgnoreSect::parse, |ignore_sect| {
+ ConditionalSect::IgnoreSect(ignore_sect)
+ }),
+ ))(input)
+ }
+}
+
+/// [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
+impl<'s> Parser<'s, IncludeSect<'s>> for IncludeSect<'s> {
+ fn parse(input: &'s str) -> IResult<&str, IncludeSect<'s>> {
+ map(
+ delimited(
+ tuple((
+ tag("<!["),
+ opt(S::parse),
+ tag("INCLUDE"),
+ opt(S::parse),
+ tag("["),
+ )),
+ ExtSubsetDecl::parse,
+ tag("]]>"),
+ ),
+ |ext_subset_decl| IncludeSect(ext_subset_decl),
+ )(input)
+ }
+}
+
+/// [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
+impl<'s> Parser<'s, IgnoreSect<'s>> for IgnoreSect<'s> {
+ fn parse(input: &'s str) -> IResult<&str, IgnoreSect<'s>> {
+ map(
+ delimited(
+ tuple((
+ tag("<!["),
+ opt(S::parse),
+ tag("IGNORE"),
+ opt(S::parse),
+ tag("["),
+ )),
+ many0(IgnoreSectContents::parse),
+ tag("]]>"),
+ ),
+ |ignore_sect_contents| IgnoreSect(ignore_sect_contents),
+ )(input)
+ }
+}
+
+/// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
+impl<'s> Parser<'s, IgnoreSectContents<'s>> for IgnoreSectContents<'s> {
+ fn parse(input: &'s str) -> IResult<&str, IgnoreSectContents<'s>> {
+ map(
+ pair(
+ Ignore::parse,
+ many0(tuple((
+ delimited(tag("<!["), IgnoreSectContents::parse, tag("]]>")),
+ Ignore::parse,
+ ))),
+ ),
+ |(ignore, ignore_list)| IgnoreSectContents {
+ ignore,
+ ignore_list,
+ },
+ )(input)
+ }
+}
+
+/// [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
+impl<'s> Parser<'s, Ignore<'s>> for Ignore<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Ignore<'s>> {
+ map(
+ recognize(many_till(Char::parse, peek(alt((tag("<!["), tag("]]>")))))),
+ |ignore| Ignore(ignore),
+ )(input)
+ }
+}
+
+/// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
+impl<'s> Parser<'s, CharRef<'s>> for CharRef<'s> {
+ fn parse(input: &'s str) -> IResult<&str, CharRef<'s>> {
+ alt((
+ delimited(
+ tag("&#"),
+ map(take_while(|c| matches!(c, '0'..='9')), |decimal| {
+ CharRef::Decimal(decimal)
+ }),
+ tag(";"),
+ ),
+ delimited(
+ tag("&#x"),
+ map(
+ take_while(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F' )),
+ |hexadecimal| CharRef::Hexadecimal(hexadecimal),
+ ),
+ tag(";"),
+ ),
+ ))(input)
+ }
+}
+
+/// [67] Reference ::= EntityRef | CharRef
+impl<'s> Parser<'s, Reference<'s>> for Reference<'s> {
+ fn parse(input: &'s str) -> IResult<&str, Reference<'s>> {
+ alt((
+ map(EntityRef::parse, |entity_ref| {
+ Reference::EntityRef(entity_ref)
+ }),
+ map(CharRef::parse, |char_ref| Reference::CharRef(char_ref)),
+ ))(input)
+ }
+}
+
+/// [68] EntityRef ::= '&' Name ';'
+impl<'s> Parser<'s, EntityRef<'s>> for EntityRef<'s> {
+ fn parse(input: &'s str) -> IResult<&str, EntityRef<'s>> {
+ map(delimited(tag("&"), Name::parse, tag(";")), |entity_ref| {
+ EntityRef(entity_ref)
+ })(input)
+ }
+}
+
+/// [69] PEReference ::= '%' Name ';'
+impl<'s> Parser<'s, PEReference<'s>> for PEReference<'s> {
+ fn parse(input: &'s str) -> IResult<&str, PEReference<'s>> {
+ map(delimited(tag("%"), Name::parse, tag(";")), |pe_reference| {
+ PEReference(pe_reference)
+ })(input)
+ }
+}
+
+/// [70] EntityDecl ::= GEDecl | PEDecl
+impl<'s> Parser<'s, EntityDecl<'s>> for EntityDecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, EntityDecl<'s>> {
+ alt((
+ map(GEDecl::parse, |ge_decl| EntityDecl::GEDecl(ge_decl)),
+ map(PEDecl::parse, |pe_decl| EntityDecl::PEDecl(pe_decl)),
+ ))(input)
+ }
+}
+
+/// [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
+impl<'s> Parser<'s, GEDecl<'s>> for GEDecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, GEDecl<'s>> {
+ map(
+ delimited(
+ pair(tag("<!ENTITY"), S::parse),
+ separated_pair(Name::parse, S::parse, EntityDef::parse),
+ pair(opt(S::parse), tag(">")),
+ ),
+ |(name, entity_def)| GEDecl { name, entity_def },
+ )(input)
+ }
+}
+
+/// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
+impl<'s> Parser<'s, PEDecl<'s>> for PEDecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, PEDecl<'s>> {
+ map(
+ delimited(
+ tuple((tag("<!ENTITY"), S::parse, tag("%"), S::parse)),
+ separated_pair(Name::parse, S::parse, PEDef::parse),
+ pair(opt(S::parse), tag(">")),
+ ),
+ |(name, pe_def)| PEDecl { name, pe_def },
+ )(input)
+ }
+}
+
+/// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
+impl<'s> Parser<'s, EntityDef<'s>> for EntityDef<'s> {
+ fn parse(input: &'s str) -> IResult<&str, EntityDef<'s>> {
+ alt((
+ map(EntityValue::parse, |entity_value| {
+ EntityDef::EntityValue(entity_value)
+ }),
+ map(
+ pair(ExternalID::parse, opt(NDataDecl::parse)),
+ |(external_id, ndata_decl)| EntityDef::ExternalID {
+ external_id,
+ ndata_decl,
+ },
+ ),
+ ))(input)
+ }
+}
+
+/// [74] PEDef ::= EntityValue | ExternalID
+impl<'s> Parser<'s, PEDef<'s>> for PEDef<'s> {
+ fn parse(input: &'s str) -> IResult<&str, PEDef<'s>> {
+ alt((
+ map(EntityValue::parse, |entity_value| {
+ PEDef::EntityValue(entity_value)
+ }),
+ map(ExternalID::parse, |external_id| {
+ PEDef::ExternalID(external_id)
+ }),
+ ))(input)
+ }
+}
+
+/// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
+// pub fn external_id(input: &str) -> IResult<&str, ExternalID> {
+impl<'s> Parser<'s, ExternalID<'s>> for ExternalID<'s> {
+ fn parse(input: &'s str) -> IResult<&str, ExternalID<'s>> {
+ alt((
+ map(
+ preceded(pair(tag("SYSTEM"), S::parse), SystemLiteral::parse),
+ |system_identifier| ExternalID::SYSTEM { system_identifier },
+ ),
+ map(
+ preceded(
+ pair(tag("PUBLIC"), S::parse),
+ separated_pair(PubidLiteral::parse, S::parse, SystemLiteral::parse),
+ ),
+ |(public_identifier, system_identifier)| ExternalID::PUBLIC {
+ public_identifier,
+ system_identifier,
+ },
+ ),
+ ))(input)
+ }
+}
+
+/// [76] NDataDecl ::= S 'NDATA' S Name
+impl<'s> Parser<'s, NDataDecl<'s>> for NDataDecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, NDataDecl<'s>> {
+ map(
+ preceded(tuple((S::parse, tag("NDATA"), S::parse)), Name::parse),
+ |n_data_decl| NDataDecl(n_data_decl),
+ )(input)
+ }
+}
+
+/// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
+impl<'s> Parser<'s, TextDecl<'s>> for TextDecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, TextDecl<'s>> {
+ map(
+ delimited(
+ tag("<?xml"),
+ pair(
+ opt(VersionInfo::parse),
+ terminated(EncodingDecl::parse, opt(S::parse)),
+ ),
+ tag("?>"),
+ ),
+ |(version_info, encoding_decl)| TextDecl {
+ version_info,
+ encoding_decl,
+ },
+ )(input)
+ }
+}
+
+/// [78] extParsedEnt ::= TextDecl? content
+impl<'s> Parser<'s, ExtParsedEnt<'s>> for ExtParsedEnt<'s> {
+ fn parse(input: &'s str) -> IResult<&str, ExtParsedEnt<'s>> {
+ map(
+ pair(opt(TextDecl::parse), Content::parse),
+ |(text_decl, content)| ExtParsedEnt { text_decl, content },
+ )(input)
+ }
+}
+
+/// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName
+impl<'s> Parser<'s, EncodingDecl<'s>> for EncodingDecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, EncodingDecl<'s>> {
+ map(
+ preceded(
+ tuple((S::parse, tag("encoding"), Eq::parse)),
+ alt((
+ delimited(char('"'), EncName::parse, char('"')),
+ delimited(char('\''), EncName::parse, char('\'')),
+ )),
+ ),
+ |encoding_decl| EncodingDecl(encoding_decl),
+ )(input)
+ }
+}
+
+/// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
+impl<'s> Parser<'s, EncName<'s>> for EncName<'s> {
+ fn parse(input: &'s str) -> IResult<&str, EncName<'s>> {
+ map(
+ recognize(pair(
+ satisfy(|c| matches!(c, 'A'..='Z' | 'a'..='z' )),
+ many0(satisfy(
+ |c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' ),
+ )),
+ )),
+ |enc_name| EncName(enc_name),
+ )(input)
+ }
+}
+
+/// [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
+impl<'s> Parser<'s, NotationDecl<'s>> for NotationDecl<'s> {
+ fn parse(input: &'s str) -> IResult<&str, NotationDecl<'s>> {
+ map(
+ delimited(
+ pair(tag("<!NOTATION"), S::parse),
+ separated_pair(
+ Name::parse,
+ S::parse,
+ alt((
+ map(ExternalID::parse, |external_id| {
+ NotationDeclID::External(external_id)
+ }),
+ map(PublicID::parse, |public_id| {
+ NotationDeclID::Public(public_id)
+ }),
+ )),
+ ),
+ pair(opt(S::parse), tag(">")),
+ ),
+ |(name, id)| NotationDecl { name, id },
+ )(input)
+ }
+}
+
+/// [83] PublicID ::= 'PUBLIC' S PubidLiteral
+impl<'s> Parser<'s, PublicID<'s>> for PublicID<'s> {
+ fn parse(input: &'s str) -> IResult<&str, PublicID<'s>> {
+ map(
+ preceded(pair(tag("PUBLIC"), S::parse), PubidLiteral::parse),
+ |public_id| PublicID(public_id),
+ )(input)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::num::NonZero;
+
+ use super::*;
+
+ #[test]
+ fn test_char_data() {
+ assert_eq!(
+ Ok(("&def]]>ghi", CharData("abc"))),
+ CharData::parse("abc&def]]>ghi")
+ );
+ assert_eq!(
+ Ok(("]]>ghi", CharData("abcdef"))),
+ CharData::parse("abcdef]]>ghi")
+ );
+ assert_eq!(
+ Ok(("&defghi", CharData("abc"))),
+ CharData::parse("abc&defghi")
+ );
+ assert_eq!(
+ Ok(("]]>def&ghi", CharData("abc"))),
+ CharData::parse("abc]]>def&ghi")
+ );
+ assert_eq!(
+ Ok(("&ghi", CharData("abc]>def"))),
+ CharData::parse("abc]>def&ghi")
+ );
+ assert_eq!(
+ Err(Err::Incomplete(nom::Needed::Size(
+ NonZero::new(3usize).unwrap()
+ ))),
+ CharData::parse("abcdefghi")
+ );
+ }
+
+ #[test]
+ fn test_comment() {
+ assert_eq!(Ok(("", Comment(""))), Comment::parse("<!---->"));
+ assert_eq!(Ok(("", Comment("asdf"))), Comment::parse("<!--asdf-->"));
+ assert_eq!(Ok(("", Comment("as-df"))), Comment::parse("<!--as-df-->"));
+ assert_eq!(
+ Err(Err::Incomplete(nom::Needed::Size(
+ NonZero::new(2usize).unwrap()
+ ))),
+ Comment::parse("<!--asdf")
+ );
+ }
+
+ #[test]
+ fn test_pi_target() {
+ assert_eq!(Ok((" ", PITarget(Name("asdf")))), PITarget::parse("asdf "));
+ assert_eq!(
+ Ok((" ", PITarget(Name("xmlasdf")))),
+ PITarget::parse("xmlasdf ")
+ );
+ assert_eq!(
+ Err(Err::Error(Error {
+ input: "xml ",
+ code: ErrorKind::Tag
+ })),
+ PITarget::parse("xml ")
+ );
+ assert_eq!(
+ Err(Err::Error(Error {
+ input: "xMl ",
+ code: ErrorKind::Tag
+ })),
+ PITarget::parse("xMl ")
+ );
+ }
+
+ #[test]
+ fn test_cd_sect() {
+ assert_eq!(
+ Ok(("", CDSect(CData("<greeting>Hello, world!</greeting>")))),
+ CDSect::parse("<![CDATA[<greeting>Hello, world!</greeting>]]>")
+ )
+ }
+
+ #[test]
+ fn test_cd_start() {
+ assert_eq!(Ok(("asdf", CDStart)), CDStart::parse("<![CDATA[asdf"))
+ }
+
+ #[test]
+ fn test_cdata() {
+ assert_eq!(Ok(("]]>asdf", CData("asdf"))), CData::parse("asdf]]>asdf"));
+ assert_eq!(
+ Ok(("]]>asdf", CData("<![CDATA[asdf"))),
+ CData::parse("<![CDATA[asdf]]>asdf")
+ );
+ assert_eq!(
+ Ok(("]]>asdf", CData("<greeting>Hello, world!</greeting>"))),
+ CData::parse("<greeting>Hello, world!</greeting>]]>asdf")
+ )
+ }
+
+ #[test]
+ fn test_cd_end() {
+ assert_eq!(Ok(("asdf", CDEnd)), CDEnd::parse("]]>asdf"))
+ }
+}