diff options
| author | 2024-06-27 20:22:05 +0100 | |
|---|---|---|
| committer | 2024-06-27 20:22:05 +0100 | |
| commit | c08b4504ab326203b2c11abe566e518b6466613a (patch) | |
| tree | 0b95361353c6f3c1e15c5f6f6db966baccb404cb | |
| parent | fa54b2dd3bd323b857bcd0cf3aa1ba13bd18a7de (diff) | |
| download | peanuts-c08b4504ab326203b2c11abe566e518b6466613a.tar.gz peanuts-c08b4504ab326203b2c11abe566e518b6466613a.tar.bz2 peanuts-c08b4504ab326203b2c11abe566e518b6466613a.zip | |
namespace parsing
Diffstat (limited to '')
| -rw-r--r-- | src/lib.rs | 2 | ||||
| -rw-r--r-- | src/xml/mod.rs (renamed from src/parser.rs) | 203 | ||||
| -rw-r--r-- | src/xml/parsers.rs | 1 | 
3 files changed, 176 insertions, 30 deletions
| @@ -1,5 +1,5 @@  mod element;  mod error; -pub mod parser;  mod reader;  mod writer; +pub mod xml; diff --git a/src/parser.rs b/src/xml/mod.rs index 266becc..47c1779 100644 --- a/src/parser.rs +++ b/src/xml/mod.rs @@ -19,6 +19,105 @@ use nom::{  // output is a rust representation of the input xml  // types could be used for xml production too? +mod parsers; + +#[derive(Clone, Debug)] +pub enum NSAttName<'s> { +    PrefixedAttName(PrefixedAttName<'s>), +    DefaultAttName, +} +/// [1]   	NSAttName	   ::=   	PrefixedAttName | DefaultAttName +pub fn ns_att_name(input: &str) -> IResult<&str, NSAttName> { +    alt(( +        map(prefixed_att_name, |prefixed_att_name| { +            NSAttName::PrefixedAttName(prefixed_att_name) +        }), +        value(NSAttName::DefaultAttName, default_att_name), +    ))(input) +} + +#[derive(Clone, Debug)] +pub struct PrefixedAttName<'s>(NCName<'s>); +/// [2]   	PrefixedAttName	   ::=   	'xmlns:' NCName +pub fn prefixed_att_name(input: &str) -> IResult<&str, PrefixedAttName> { +    map(preceded(tag("xmlns:"), nc_name), |nc_name| { +        PrefixedAttName(nc_name) +    })(input) +} + +#[derive(Clone, Debug)] +pub struct DefaultAttName; +/// [3]   	DefaultAttName	   ::=   	'xmlns'; +pub fn default_att_name(input: &str) -> IResult<&str, DefaultAttName> { +    value(DefaultAttName, tag("xmlns"))(input) +} + +#[derive(Clone, Debug)] +pub struct NCName<'s>(&'s str); +/// [4]   	NCName	   ::=   	Name - (Char* ':' Char*) +pub fn nc_name(input: &str) -> IResult<&str, NCName> { +    map( +        recognize(pair( +            recognize(name_start_char).and_then(satisfy(|c| c != ':')), +            many_till(name_char, peek(char(':'))), +        )), +        |nc_name| NCName(nc_name), +    )(input) +} + +#[derive(Clone, Debug)] +pub enum QName<'s> { +    PrefixedName(PrefixedName<'s>), +    UnprefixedName(UnprefixedName<'s>), +} +/// [7]   	QName	   ::=   	PrefixedName | UnprefixedName +pub fn q_name(input: &str) -> IResult<&str, QName> { +    alt(( +        map(prefixed_name, |prefixed_name| { +            QName::PrefixedName(prefixed_name) +        }), +        map(unprefixed_name, |unprefixed_name| { +            QName::UnprefixedName(unprefixed_name) +        }), +    ))(input) +} + +#[derive(Clone, Debug)] +pub struct PrefixedName<'s> { +    prefix: Prefix<'s>, +    local_part: LocalPart<'s>, +} +/// [8]   	PrefixedName	   ::=   	Prefix ':' LocalPart +pub fn prefixed_name(input: &str) -> IResult<&str, PrefixedName> { +    map( +        separated_pair(prefix, char(':'), local_part), +        |(prefix, local_part)| PrefixedName { prefix, local_part }, +    )(input) +} + +#[derive(Clone, Debug)] +pub struct UnprefixedName<'s>(LocalPart<'s>); +/// [9]   	UnprefixedName	   ::=   	LocalPart +pub fn unprefixed_name(input: &str) -> IResult<&str, UnprefixedName> { +    map(local_part, |local_part| UnprefixedName(local_part))(input) +} + +#[derive(Clone, Debug)] +pub struct Prefix<'s>(NCName<'s>); +/// [10]   	Prefix	   ::=   	NCName +pub fn prefix(input: &str) -> IResult<&str, Prefix> { +    map(nc_name, |nc_name| Prefix(nc_name))(input) +} + +#[derive(Clone, Debug)] +pub struct LocalPart<'s>(NCName<'s>); +/// [11]   	LocalPart	   ::=   	NCName +pub fn local_part(input: &str) -> IResult<&str, LocalPart> { +    map(nc_name, |nc_name| LocalPart(nc_name))(input) +} + +// xml spec +  pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);  /// [1]   	document	   ::=   	prolog element Misc*  pub fn document(input: &str) -> IResult<&str, Document> { @@ -375,17 +474,18 @@ pub fn misc(input: &str) -> IResult<&str, Misc> {  #[derive(Debug)]  pub struct DoctypeDecl<'s> { -    name: &'s str, +    name: QName<'s>,      external_id: Option<ExternalID<'s>>,      int_subset: Option<IntSubset<'s>>,  } +/// [16]   	doctypedecl	   ::=   	'<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'  /// [28]   	doctypedecl	   ::=   	'<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'  pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> {      map(          delimited(              pair(tag("<!DOCTYPE"), s),              tuple(( -                name, +                q_name,                  opt(preceded(s, external_id)),                  preceded(                      opt(s), @@ -522,7 +622,7 @@ pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> {  // (Productions 33 through 38 have been removed.) -#[derive(Debug)] +#[derive(Debug, Clone)]  pub enum Element<'s> {      Empty(EmptyElemTag<'s>),      NotEmpty(STag<'s>, Content<'s>, ETag<'s>), @@ -539,41 +639,67 @@ pub fn element(input: &str) -> IResult<&str, Element> {      ))(input)  } -#[derive(Debug)] +#[derive(Debug, Clone)]  pub struct STag<'s> { -    name: Name<'s>, +    name: QName<'s>,      attributes: Vec<Attribute<'s>>,  } +/// [12]   	STag	   ::=   	'<' QName (S Attribute)* S? '>'  /// [40]   	STag	   ::=   	'<' Name (S Attribute)* S? '>'  pub fn s_tag(input: &str) -> IResult<&str, STag> {      map(          delimited(              tag("<"), -            pair(name, many0(preceded(s, attribute))), +            pair(q_name, many0(preceded(s, attribute))),              pair(opt(s), tag(">")),          ),          |(name, attributes)| STag { name, attributes },      )(input)  } -pub type Attribute<'s> = (Name<'s>, AttValue<'s>); -/// [41]   	Attribute	   ::=   	Name Eq AttValue +#[derive(Debug, Clone)] +pub enum Attribute<'s> { +    NamespaceDeclaration { +        ns_name: NSAttName<'s>, +        value: AttValue<'s>, +    }, +    Attribute { +        name: QName<'s>, +        value: AttValue<'s>, +    }, +} +/// [15]   	Attribute	   ::=   	NSAttName Eq AttValue | QName Eq AttValue  pub fn attribute(input: &str) -> IResult<&str, Attribute> { -    separated_pair(name, eq, att_value)(input) +    alt(( +        map( +            separated_pair(ns_att_name, eq, att_value), +            |(ns_name, value)| Attribute::NamespaceDeclaration { ns_name, value }, +        ), +        map(separated_pair(q_name, eq, att_value), |(name, value)| { +            Attribute::Attribute { name, value } +        }), +    ))(input)  } +// pub type Attribute<'s> = (Name<'s>, AttValue<'s>); +/// [41]   	Attribute	   ::=   	Name Eq AttValue +// pub fn attribute(input: &str) -> IResult<&str, Attribute> { +//     separated_pair(name, eq, att_value)(input) +// } -#[derive(Debug)] +#[derive(Debug, Clone)]  pub struct ETag<'s> { -    name: Name<'s>, +    name: QName<'s>,  } +/// [13]   	ETag	   ::=   	'</' QName S? '>'  /// [42]   	ETag	   ::=   	'</' Name S? '>'  pub fn e_tag(input: &str) -> IResult<&str, ETag> { -    map(delimited(tag("</"), name, pair(opt(s), tag(">"))), |name| { -        ETag { name } -    })(input) +    map( +        delimited(tag("</"), q_name, pair(opt(s), tag(">"))), +        |name| ETag { name }, +    )(input)  } -#[derive(Debug)] +#[derive(Debug, Clone)]  pub enum ContentItem<'s> {      // CharData(&'s str),      Element(Element<'s>), @@ -582,7 +708,7 @@ pub enum ContentItem<'s> {      PI(PI<'s>),      Comment(Comment<'s>),  } -#[derive(Debug)] +#[derive(Debug, Clone)]  pub struct Content<'s> {      char_data: Option<CharData<'s>>,      content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>, @@ -607,17 +733,18 @@ pub fn content(input: &str) -> IResult<&str, Content> {      )(input)  } -#[derive(Debug)] +#[derive(Debug, Clone)]  pub struct EmptyElemTag<'s> { -    name: Name<'s>, +    name: QName<'s>,      attributes: Vec<Attribute<'s>>,  } +/// [14]   	EmptyElemTag	   ::=   	'<' QName (S Attribute)* S? '/>'  /// [44]   	EmptyElemTag	   ::=   	'<' Name (S Attribute)* S? '/>'	[WFC: Unique Att Spec]  pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> {      map(          delimited(              tag("<"), -            pair(name, many0(preceded(s, attribute))), +            pair(q_name, many0(preceded(s, attribute))),              pair(opt(s), tag("/>")),          ),          |(name, attributes)| EmptyElemTag { name, attributes }, @@ -626,15 +753,16 @@ pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> {  #[derive(Debug)]  pub struct Elementdecl<'s> { -    name: Name<'s>, +    name: QName<'s>,      contentspec: Contentspec<'s>,  } +/// [17]   	elementdecl	   ::=   	'<!ELEMENT' S QName S contentspec S? '>'  /// [45]   	elementdecl	   ::=   	'<!ELEMENT' S Name S contentspec S? '>'  pub fn elementdecl(input: &str) -> IResult<&str, Elementdecl> {      map(          delimited(              pair(tag("<!ELEMENT"), s), -            separated_pair(name, s, contentspec), +            separated_pair(q_name, s, contentspec),              pair(opt(s), tag(">")),          ),          |(name, contentspec)| Elementdecl { name, contentspec }, @@ -709,7 +837,7 @@ pub fn children(input: &str) -> IResult<&str, Children> {  #[derive(Clone, Debug)]  pub enum CpKind<'s> { -    Name(Name<'s>), +    Name(QName<'s>),      Choice(Choice<'s>),      Seq(Seq<'s>),  } @@ -718,12 +846,13 @@ pub struct Cp<'s> {      kind: CpKind<'s>,      occurence: Occurence,  } +/// [18]   	cp	   ::=   	(QName | choice | seq) ('?' | '*' | '+')?  /// [48]   	cp	   ::=   	(Name | choice | seq) ('?' | '*' | '+')?  pub fn cp(input: &str) -> IResult<&str, Cp> {      map(          pair(              alt(( -                map(name, |name| CpKind::Name(name)), +                map(q_name, |name| CpKind::Name(name)),                  map(choice, |choice| CpKind::Choice(choice)),                  map(seq, |seq| CpKind::Seq(seq)),              )), @@ -769,14 +898,15 @@ pub fn seq(input: &str) -> IResult<&str, Seq> {  // always contains #PCDATA  #[derive(Clone, Debug)] -pub struct Mixed<'s>(Vec<Name<'s>>); +pub struct Mixed<'s>(Vec<QName<'s>>); +/// [19]   	Mixed	   ::=   	'(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'  /// [51]   	Mixed	   ::=   	'(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'  pub fn mixed(input: &str) -> IResult<&str, Mixed> {      alt((          map(              delimited(                  tuple((tag("("), s, tag("#PCDATA"))), -                many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)), +                many0(preceded(tuple((opt(s), tag("|"), opt(s))), q_name)),                  pair(opt(s), tag(")*")),              ),              |names| Mixed(names), @@ -790,15 +920,16 @@ pub fn mixed(input: &str) -> IResult<&str, Mixed> {  #[derive(Debug)]  pub struct AttlistDecl<'s> { -    element_type: Name<'s>, +    element_type: QName<'s>,      att_defs: Vec<AttDef<'s>>,  } +/// [20]   	AttlistDecl	   ::=   	'<!ATTLIST' S QName AttDef* S? '>'  /// [52]   	AttlistDecl	   ::=   	'<!ATTLIST' S Name AttDef* S? '>'  pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> {      map(          delimited(              pair(tag("<!ATTLIST"), s), -            pair(name, many0(att_def)), +            pair(q_name, many0(att_def)),              pair(opt(s), tag(">")),          ),          |(element_type, att_defs)| AttlistDecl { @@ -809,16 +940,30 @@ pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> {  }  #[derive(Debug)] +pub enum AttDefName<'s> { +    QName(QName<'s>), +    NSAttName(NSAttName<'s>), +} +#[derive(Debug)]  pub struct AttDef<'s> { -    name: Name<'s>, +    name: AttDefName<'s>,      att_type: AttType<'s>,      default_decl: DefaultDecl<'s>,  } +/// [21]   	AttDef	   ::=   	S (QName | NSAttName) S AttType S DefaultDecl  /// [53]   	AttDef	   ::=   	S Name S AttType S DefaultDecl  pub fn att_def(input: &str) -> IResult<&str, AttDef> {      map(          tuple(( -            preceded(s, name), +            preceded( +                s, +                alt(( +                    map(q_name, |q_name| AttDefName::QName(q_name)), +                    map(ns_att_name, |ns_att_name| { +                        AttDefName::NSAttName(ns_att_name) +                    }), +                )), +            ),              preceded(s, att_type),              preceded(s, default_decl),          )), diff --git a/src/xml/parsers.rs b/src/xml/parsers.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/xml/parsers.rs @@ -0,0 +1 @@ + | 
