aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar cel 🌸 <cel@blos.sm>2024-06-27 20:22:05 +0100
committerLibravatar cel 🌸 <cel@blos.sm>2024-06-27 20:22:05 +0100
commitc08b4504ab326203b2c11abe566e518b6466613a (patch)
tree0b95361353c6f3c1e15c5f6f6db966baccb404cb
parentfa54b2dd3bd323b857bcd0cf3aa1ba13bd18a7de (diff)
downloadpeanuts-c08b4504ab326203b2c11abe566e518b6466613a.tar.gz
peanuts-c08b4504ab326203b2c11abe566e518b6466613a.tar.bz2
peanuts-c08b4504ab326203b2c11abe566e518b6466613a.zip
namespace parsing
-rw-r--r--src/lib.rs2
-rw-r--r--src/xml/mod.rs (renamed from src/parser.rs)203
-rw-r--r--src/xml/parsers.rs1
3 files changed, 176 insertions, 30 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 8040aec..dcf14fe 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,5 @@
mod element;
mod error;
-pub mod parser;
mod reader;
mod writer;
+pub mod xml;
diff --git a/src/parser.rs b/src/xml/mod.rs
index 266becc..47c1779 100644
--- a/src/parser.rs
+++ b/src/xml/mod.rs
@@ -19,6 +19,105 @@ use nom::{
// output is a rust representation of the input xml
// types could be used for xml production too?
+mod parsers;
+
+#[derive(Clone, Debug)]
+pub enum NSAttName<'s> {
+ PrefixedAttName(PrefixedAttName<'s>),
+ DefaultAttName,
+}
+/// [1] NSAttName ::= PrefixedAttName | DefaultAttName
+pub fn ns_att_name(input: &str) -> IResult<&str, NSAttName> {
+ alt((
+ map(prefixed_att_name, |prefixed_att_name| {
+ NSAttName::PrefixedAttName(prefixed_att_name)
+ }),
+ value(NSAttName::DefaultAttName, default_att_name),
+ ))(input)
+}
+
+#[derive(Clone, Debug)]
+pub struct PrefixedAttName<'s>(NCName<'s>);
+/// [2] PrefixedAttName ::= 'xmlns:' NCName
+pub fn prefixed_att_name(input: &str) -> IResult<&str, PrefixedAttName> {
+ map(preceded(tag("xmlns:"), nc_name), |nc_name| {
+ PrefixedAttName(nc_name)
+ })(input)
+}
+
+#[derive(Clone, Debug)]
+pub struct DefaultAttName;
+/// [3] DefaultAttName ::= 'xmlns';
+pub fn default_att_name(input: &str) -> IResult<&str, DefaultAttName> {
+ value(DefaultAttName, tag("xmlns"))(input)
+}
+
+#[derive(Clone, Debug)]
+pub struct NCName<'s>(&'s str);
+/// [4] NCName ::= Name - (Char* ':' Char*)
+pub fn nc_name(input: &str) -> IResult<&str, NCName> {
+ map(
+ recognize(pair(
+ recognize(name_start_char).and_then(satisfy(|c| c != ':')),
+ many_till(name_char, peek(char(':'))),
+ )),
+ |nc_name| NCName(nc_name),
+ )(input)
+}
+
+#[derive(Clone, Debug)]
+pub enum QName<'s> {
+ PrefixedName(PrefixedName<'s>),
+ UnprefixedName(UnprefixedName<'s>),
+}
+/// [7] QName ::= PrefixedName | UnprefixedName
+pub fn q_name(input: &str) -> IResult<&str, QName> {
+ alt((
+ map(prefixed_name, |prefixed_name| {
+ QName::PrefixedName(prefixed_name)
+ }),
+ map(unprefixed_name, |unprefixed_name| {
+ QName::UnprefixedName(unprefixed_name)
+ }),
+ ))(input)
+}
+
+#[derive(Clone, Debug)]
+pub struct PrefixedName<'s> {
+ prefix: Prefix<'s>,
+ local_part: LocalPart<'s>,
+}
+/// [8] PrefixedName ::= Prefix ':' LocalPart
+pub fn prefixed_name(input: &str) -> IResult<&str, PrefixedName> {
+ map(
+ separated_pair(prefix, char(':'), local_part),
+ |(prefix, local_part)| PrefixedName { prefix, local_part },
+ )(input)
+}
+
+#[derive(Clone, Debug)]
+pub struct UnprefixedName<'s>(LocalPart<'s>);
+/// [9] UnprefixedName ::= LocalPart
+pub fn unprefixed_name(input: &str) -> IResult<&str, UnprefixedName> {
+ map(local_part, |local_part| UnprefixedName(local_part))(input)
+}
+
+#[derive(Clone, Debug)]
+pub struct Prefix<'s>(NCName<'s>);
+/// [10] Prefix ::= NCName
+pub fn prefix(input: &str) -> IResult<&str, Prefix> {
+ map(nc_name, |nc_name| Prefix(nc_name))(input)
+}
+
+#[derive(Clone, Debug)]
+pub struct LocalPart<'s>(NCName<'s>);
+/// [11] LocalPart ::= NCName
+pub fn local_part(input: &str) -> IResult<&str, LocalPart> {
+ map(nc_name, |nc_name| LocalPart(nc_name))(input)
+}
+
+// xml spec
+
pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
/// [1] document ::= prolog element Misc*
pub fn document(input: &str) -> IResult<&str, Document> {
@@ -375,17 +474,18 @@ pub fn misc(input: &str) -> IResult<&str, Misc> {
#[derive(Debug)]
pub struct DoctypeDecl<'s> {
- name: &'s str,
+ name: QName<'s>,
external_id: Option<ExternalID<'s>>,
int_subset: Option<IntSubset<'s>>,
}
+/// [16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
/// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> {
map(
delimited(
pair(tag("<!DOCTYPE"), s),
tuple((
- name,
+ q_name,
opt(preceded(s, external_id)),
preceded(
opt(s),
@@ -522,7 +622,7 @@ pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> {
// (Productions 33 through 38 have been removed.)
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub enum Element<'s> {
Empty(EmptyElemTag<'s>),
NotEmpty(STag<'s>, Content<'s>, ETag<'s>),
@@ -539,41 +639,67 @@ pub fn element(input: &str) -> IResult<&str, Element> {
))(input)
}
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub struct STag<'s> {
- name: Name<'s>,
+ name: QName<'s>,
attributes: Vec<Attribute<'s>>,
}
+/// [12] STag ::= '<' QName (S Attribute)* S? '>'
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
pub fn s_tag(input: &str) -> IResult<&str, STag> {
map(
delimited(
tag("<"),
- pair(name, many0(preceded(s, attribute))),
+ pair(q_name, many0(preceded(s, attribute))),
pair(opt(s), tag(">")),
),
|(name, attributes)| STag { name, attributes },
)(input)
}
-pub type Attribute<'s> = (Name<'s>, AttValue<'s>);
-/// [41] Attribute ::= Name Eq AttValue
+#[derive(Debug, Clone)]
+pub enum Attribute<'s> {
+ NamespaceDeclaration {
+ ns_name: NSAttName<'s>,
+ value: AttValue<'s>,
+ },
+ Attribute {
+ name: QName<'s>,
+ value: AttValue<'s>,
+ },
+}
+/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue
pub fn attribute(input: &str) -> IResult<&str, Attribute> {
- separated_pair(name, eq, att_value)(input)
+ alt((
+ map(
+ separated_pair(ns_att_name, eq, att_value),
+ |(ns_name, value)| Attribute::NamespaceDeclaration { ns_name, value },
+ ),
+ map(separated_pair(q_name, eq, att_value), |(name, value)| {
+ Attribute::Attribute { name, value }
+ }),
+ ))(input)
}
+// pub type Attribute<'s> = (Name<'s>, AttValue<'s>);
+/// [41] Attribute ::= Name Eq AttValue
+// pub fn attribute(input: &str) -> IResult<&str, Attribute> {
+// separated_pair(name, eq, att_value)(input)
+// }
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub struct ETag<'s> {
- name: Name<'s>,
+ name: QName<'s>,
}
+/// [13] ETag ::= '</' QName S? '>'
/// [42] ETag ::= '</' Name S? '>'
pub fn e_tag(input: &str) -> IResult<&str, ETag> {
- map(delimited(tag("</"), name, pair(opt(s), tag(">"))), |name| {
- ETag { name }
- })(input)
+ map(
+ delimited(tag("</"), q_name, pair(opt(s), tag(">"))),
+ |name| ETag { name },
+ )(input)
}
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub enum ContentItem<'s> {
// CharData(&'s str),
Element(Element<'s>),
@@ -582,7 +708,7 @@ pub enum ContentItem<'s> {
PI(PI<'s>),
Comment(Comment<'s>),
}
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub struct Content<'s> {
char_data: Option<CharData<'s>>,
content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
@@ -607,17 +733,18 @@ pub fn content(input: &str) -> IResult<&str, Content> {
)(input)
}
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub struct EmptyElemTag<'s> {
- name: Name<'s>,
+ name: QName<'s>,
attributes: Vec<Attribute<'s>>,
}
+/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> {
map(
delimited(
tag("<"),
- pair(name, many0(preceded(s, attribute))),
+ pair(q_name, many0(preceded(s, attribute))),
pair(opt(s), tag("/>")),
),
|(name, attributes)| EmptyElemTag { name, attributes },
@@ -626,15 +753,16 @@ pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> {
#[derive(Debug)]
pub struct Elementdecl<'s> {
- name: Name<'s>,
+ name: QName<'s>,
contentspec: Contentspec<'s>,
}
+/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'
/// [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
pub fn elementdecl(input: &str) -> IResult<&str, Elementdecl> {
map(
delimited(
pair(tag("<!ELEMENT"), s),
- separated_pair(name, s, contentspec),
+ separated_pair(q_name, s, contentspec),
pair(opt(s), tag(">")),
),
|(name, contentspec)| Elementdecl { name, contentspec },
@@ -709,7 +837,7 @@ pub fn children(input: &str) -> IResult<&str, Children> {
#[derive(Clone, Debug)]
pub enum CpKind<'s> {
- Name(Name<'s>),
+ Name(QName<'s>),
Choice(Choice<'s>),
Seq(Seq<'s>),
}
@@ -718,12 +846,13 @@ pub struct Cp<'s> {
kind: CpKind<'s>,
occurence: Occurence,
}
+/// [18] cp ::= (QName | choice | seq) ('?' | '*' | '+')?
/// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
pub fn cp(input: &str) -> IResult<&str, Cp> {
map(
pair(
alt((
- map(name, |name| CpKind::Name(name)),
+ map(q_name, |name| CpKind::Name(name)),
map(choice, |choice| CpKind::Choice(choice)),
map(seq, |seq| CpKind::Seq(seq)),
)),
@@ -769,14 +898,15 @@ pub fn seq(input: &str) -> IResult<&str, Seq> {
// always contains #PCDATA
#[derive(Clone, Debug)]
-pub struct Mixed<'s>(Vec<Name<'s>>);
+pub struct Mixed<'s>(Vec<QName<'s>>);
+/// [19] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'
/// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
pub fn mixed(input: &str) -> IResult<&str, Mixed> {
alt((
map(
delimited(
tuple((tag("("), s, tag("#PCDATA"))),
- many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)),
+ many0(preceded(tuple((opt(s), tag("|"), opt(s))), q_name)),
pair(opt(s), tag(")*")),
),
|names| Mixed(names),
@@ -790,15 +920,16 @@ pub fn mixed(input: &str) -> IResult<&str, Mixed> {
#[derive(Debug)]
pub struct AttlistDecl<'s> {
- element_type: Name<'s>,
+ element_type: QName<'s>,
att_defs: Vec<AttDef<'s>>,
}
+/// [20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'
/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> {
map(
delimited(
pair(tag("<!ATTLIST"), s),
- pair(name, many0(att_def)),
+ pair(q_name, many0(att_def)),
pair(opt(s), tag(">")),
),
|(element_type, att_defs)| AttlistDecl {
@@ -809,16 +940,30 @@ pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> {
}
#[derive(Debug)]
+pub enum AttDefName<'s> {
+ QName(QName<'s>),
+ NSAttName(NSAttName<'s>),
+}
+#[derive(Debug)]
pub struct AttDef<'s> {
- name: Name<'s>,
+ name: AttDefName<'s>,
att_type: AttType<'s>,
default_decl: DefaultDecl<'s>,
}
+/// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
/// [53] AttDef ::= S Name S AttType S DefaultDecl
pub fn att_def(input: &str) -> IResult<&str, AttDef> {
map(
tuple((
- preceded(s, name),
+ preceded(
+ s,
+ alt((
+ map(q_name, |q_name| AttDefName::QName(q_name)),
+ map(ns_att_name, |ns_att_name| {
+ AttDefName::NSAttName(ns_att_name)
+ }),
+ )),
+ ),
preceded(s, att_type),
preceded(s, default_decl),
)),
diff --git a/src/xml/parsers.rs b/src/xml/parsers.rs
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/xml/parsers.rs
@@ -0,0 +1 @@
+