diff options
author | 2024-11-19 16:07:34 +0000 | |
---|---|---|
committer | 2024-11-19 16:07:34 +0000 | |
commit | c2a84072ac8c393a28711e118942da7b0377d895 (patch) | |
tree | 45bb09119935b29cebad87ae14bba4c00a620ffe | |
parent | c8ed16a2d1d1f1b5278936eeadeae87da7cb104e (diff) | |
download | peanuts-c2a84072ac8c393a28711e118942da7b0377d895.tar.gz peanuts-c2a84072ac8c393a28711e118942da7b0377d895.tar.bz2 peanuts-c2a84072ac8c393a28711e118942da7b0377d895.zip |
fix attribute namespace resolution
-rw-r--r-- | src/element.rs | 4 | ||||
-rw-r--r-- | src/reader.rs | 436 |
2 files changed, 180 insertions, 260 deletions
diff --git a/src/element.rs b/src/element.rs index 4c39c6a..6d2a6b7 100644 --- a/src/element.rs +++ b/src/element.rs @@ -19,7 +19,7 @@ pub struct NamespaceDeclaration { #[derive(PartialEq, Eq, Hash, Clone, Debug)] pub struct Name { pub namespace: String, - pub name: String, + pub local_name: String, } #[derive(Debug)] @@ -40,7 +40,7 @@ pub struct Element { // namespace: String, // hashmap of explicit namespace declarations on the element itself only // possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader. - // pub namespace_decl: HashSet<Namespace>, + pub namespace_declarations: HashSet<NamespaceDeclaration>, // attributes can be in a different namespace than the element. how to make sure they are valid? // maybe include the namespace instead of or with the prefix // you can calculate the prefix from the namespaced name and the current writer context diff --git a/src/reader.rs b/src/reader.rs index 654ca2a..a05e73b 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -277,10 +277,12 @@ where impl<R> Reader<R> { fn start_tag_from_xml( depth: &mut Vec<Name>, - namespaces: &mut Vec<HashSet<NamespaceDeclaration>>, + namespace_declarations: &mut Vec<HashSet<NamespaceDeclaration>>, s_tag: xml::STag, ) -> Result<Element> { - let mut namespace_declarations = HashSet::new(); + // namespace declarations on element + + let mut element_namespace_declarations = HashSet::new(); for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| { if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { Some((ns_name, value)) @@ -298,20 +300,49 @@ impl<R> Reader<R> { prefix, namespace: namespace.process()?, }; - if !namespace_declarations.insert(namespace.clone()) { + if !element_namespace_declarations.insert(namespace.clone()) { return Err(Error::DuplicateNameSpaceDeclaration(namespace)); } } - // all namespaces available to the element (from both parent elements and element itself) - let namespace_stack: Vec<&NamespaceDeclaration> = namespaces + // all namespaces available in the element scope (from both parent elements and element itself) + let namespace_declarations_stack: Vec<&NamespaceDeclaration> = namespace_declarations .iter() .flatten() - .chain(namespace_declarations.iter()) + .chain(element_namespace_declarations.iter()) .collect(); - let mut attributes = HashMap::new(); + // element name and default attribute namespace + let element_namespace_declaration; + let element_local_name = s_tag.name.local_part().to_string(); + + match s_tag.name.prefix() { + Some(prefix) => { + element_namespace_declaration = namespace_declarations_stack + .iter() + .rfind(|namespace| namespace.prefix.as_deref() == Some(prefix)); + } + None => { + element_namespace_declaration = namespace_declarations_stack + .iter() + .rfind(|namespace| namespace.prefix == None); + } + } + + let element_default_namespace = element_namespace_declaration + .ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))? + .namespace + .clone(); + + let element_name = Name { + namespace: element_default_namespace, + local_name: element_local_name, + }; + + // attributes + + let mut attributes = HashMap::new(); for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| { if let xml::Attribute::Attribute { name, value } = attribute { Some((name, value)) @@ -319,28 +350,26 @@ impl<R> Reader<R> { None } }) { - let namespace; - let attribute_name; - match q_name { - xml::QName::PrefixedName(prefixed_name) => { - namespace = namespace_stack.iter().rfind(|namespace| { - namespace.prefix.as_deref() == Some(**prefixed_name.prefix) - }); - attribute_name = prefixed_name.local_part.to_string(); - } - xml::QName::UnprefixedName(unprefixed_name) => { - namespace = namespace_stack - .iter() - .rfind(|namespace| namespace.prefix == None); - attribute_name = unprefixed_name.to_string(); + let attribute_namespace_declaration; + let attribute_local_name = q_name.local_part().to_string(); + match q_name.prefix() { + Some(prefix) => { + attribute_namespace_declaration = + namespace_declarations_stack + .iter() + .rfind(|namespace_declaration| { + namespace_declaration.prefix.as_deref() == Some(prefix) + }); } + None => attribute_namespace_declaration = element_namespace_declaration, } - if let Some(namespace_declaration) = namespace { + if let Some(namespace_declaration) = attribute_namespace_declaration { let name = Name { namespace: namespace_declaration.namespace.clone(), - name: attribute_name, + local_name: attribute_local_name, }; let value = value.process()?; + // check for duplicate attribute if let Some(_value) = attributes.insert(name, value) { return Err(Error::DuplicateAttribute(q_name.to_string())); } @@ -349,38 +378,13 @@ impl<R> Reader<R> { } } - let name; - let namespace; - match &s_tag.name { - xml::QName::PrefixedName(prefixed_name) => { - namespace = namespace_stack - .iter() - .rfind(|namespace| namespace.prefix.as_deref() == Some(**prefixed_name.prefix)); - name = prefixed_name.local_part.to_string(); - } - xml::QName::UnprefixedName(unprefixed_name) => { - namespace = namespace_stack - .iter() - .rfind(|namespace| namespace.prefix == None); - name = unprefixed_name.to_string(); - } - } + depth.push(element_name.clone()); - let namespace_declaration = (*namespace - .ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?) - .clone(); - - let name = Name { - namespace: namespace_declaration.namespace, - name, - }; - - depth.push(name.clone()); - - namespaces.push(namespace_declarations.clone()); + namespace_declarations.push(element_namespace_declarations.clone()); return Ok(Element { - name, + name: element_name, + namespace_declarations: element_namespace_declarations, attributes, content: Vec::new(), }); @@ -418,13 +422,16 @@ impl<R> Reader<R> { name = unprefixed_name.to_string(); } } - let e_tag_name = Name { namespace, name }; + let e_tag_name = Name { + namespace, + local_name: name, + }; if s_tag_name == e_tag_name { namespaces.pop(); return Ok(()); } else { return Err(Error::MismatchedEndTag( - s_tag_name.name, + s_tag_name.local_name, e_tag.name.to_string(), )); } @@ -434,240 +441,153 @@ impl<R> Reader<R> { } fn element_from_xml( - namespaces: &mut Vec<HashSet<NamespaceDeclaration>>, + namespace_declarations: &mut Vec<HashSet<NamespaceDeclaration>>, element: xml::Element, ) -> Result<Element> { + let xml_name; + let xml_attributes; + let xml_content; + let xml_e_name; + match element { xml::Element::Empty(empty_elem_tag) => { - let mut namespace_declarations = HashSet::new(); - for (prefix, namespace) in - empty_elem_tag.attributes.iter().filter_map(|attribute| { - if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { - Some((ns_name, value)) - } else { - None - } - }) - { - let prefix = match prefix { - xml::NSAttName::PrefixedAttName(prefixed_att_name) => { - Some(prefixed_att_name.to_string()) - } - xml::NSAttName::DefaultAttName => None, - }; - let namespace = NamespaceDeclaration { - prefix, - namespace: namespace.process()?, - }; - if !namespace_declarations.insert(namespace.clone()) { - return Err(Error::DuplicateNameSpaceDeclaration(namespace)); - } - } - - // all namespaces available to the element (from both parent elements and element itself) - let namespace_stack: Vec<&NamespaceDeclaration> = namespaces - .iter() - .flatten() - .chain(namespace_declarations.iter()) - .collect(); + xml_name = empty_elem_tag.name; + xml_attributes = empty_elem_tag.attributes; + xml_content = None; + xml_e_name = None; + } + xml::Element::NotEmpty(s_tag, content, e_tag) => { + xml_name = s_tag.name; + xml_attributes = s_tag.attributes; + xml_content = Some(content); + xml_e_name = Some(e_tag.name); + } + } - let mut attributes = HashMap::new(); + // namespace declarations on element - for (q_name, value) in empty_elem_tag.attributes.iter().filter_map(|attribute| { - if let xml::Attribute::Attribute { name, value } = attribute { - Some((name, value)) - } else { - None - } - }) { - let namespace; - let attribute_name; - match q_name { - xml::QName::PrefixedName(prefixed_name) => { - namespace = namespace_stack.iter().rfind(|namespace| { - namespace.prefix.as_deref() == Some(**prefixed_name.prefix) - }); - attribute_name = prefixed_name.local_part.to_string(); - } - xml::QName::UnprefixedName(unprefixed_name) => { - namespace = namespace_stack - .iter() - .rfind(|namespace| namespace.prefix == None); - attribute_name = unprefixed_name.to_string(); - } - } - if let Some(namespace) = namespace { - let namespace = (*namespace).clone(); - let name = Name { - namespace: namespace.namespace, - name: attribute_name, - }; - let value = value.process()?; - if let Some(_value) = attributes.insert(name, value) { - return Err(Error::DuplicateAttribute(q_name.to_string())); - } - } else { - return Err(Error::UnqualifiedNamespace(q_name.to_string())); - } + let mut element_namespace_declarations = HashSet::new(); + for (prefix, namespace) in xml_attributes.iter().filter_map(|attribute| { + if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { + Some((ns_name, value)) + } else { + None + } + }) { + let prefix = match prefix { + xml::NSAttName::PrefixedAttName(prefixed_att_name) => { + Some(prefixed_att_name.to_string()) } + xml::NSAttName::DefaultAttName => None, + }; + let namespace = NamespaceDeclaration { + prefix, + namespace: namespace.process()?, + }; + if !element_namespace_declarations.insert(namespace.clone()) { + return Err(Error::DuplicateNameSpaceDeclaration(namespace)); + } + } - let name; - let namespace; - match &empty_elem_tag.name { - xml::QName::PrefixedName(prefixed_name) => { - namespace = namespace_stack.iter().rfind(|namespace| { - namespace.prefix.as_deref() == Some(**prefixed_name.prefix) - }); - name = prefixed_name.local_part.to_string(); - } - xml::QName::UnprefixedName(unprefixed_name) => { - namespace = namespace_stack - .iter() - .rfind(|namespace| namespace.prefix == None); - name = unprefixed_name.to_string(); - } - } + // all namespaces available in the element scope (from both parent elements and element itself) + let namespace_declarations_stack: Vec<&NamespaceDeclaration> = namespace_declarations + .iter() + .flatten() + .chain(element_namespace_declarations.iter()) + .collect(); - let namespace = (*namespace - .ok_or_else(|| Error::UnqualifiedNamespace(empty_elem_tag.name.to_string()))?) - .clone(); + // element name and default attribute namespace - let name = Name { - namespace: namespace.namespace, - name, - }; + let element_namespace_declaration; + let element_local_name = xml_name.local_part().to_string(); - return Ok(Element { - name, - attributes, - content: Vec::new(), - }); + match xml_name.prefix() { + Some(prefix) => { + element_namespace_declaration = namespace_declarations_stack + .iter() + .rfind(|namespace| namespace.prefix.as_deref() == Some(prefix)); } - xml::Element::NotEmpty(s_tag, content, e_tag) => { - if s_tag.name != e_tag.name { - return Err(Error::MismatchedEndTag( - s_tag.name.to_string(), - e_tag.name.to_string(), - )); - } - let mut namespace_declarations = HashSet::new(); - for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| { - if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { - Some((ns_name, value)) - } else { - None - } - }) { - let prefix = match prefix { - xml::NSAttName::PrefixedAttName(prefixed_att_name) => { - Some(prefixed_att_name.to_string()) - } - xml::NSAttName::DefaultAttName => None, - }; - let namespace = NamespaceDeclaration { - prefix, - namespace: namespace.process()?, - }; - if !namespace_declarations.insert(namespace.clone()) { - return Err(Error::DuplicateNameSpaceDeclaration(namespace)); - } - } - - // all namespaces available to the element (from both parent elements and element itself) - let namespace_stack: Vec<&NamespaceDeclaration> = namespaces + None => { + element_namespace_declaration = namespace_declarations_stack .iter() - .flatten() - .chain(namespace_declarations.iter()) - .collect(); + .rfind(|namespace| namespace.prefix == None); + } + } - let mut attributes = HashMap::new(); + let element_default_namespace = element_namespace_declaration + .ok_or_else(|| Error::UnqualifiedNamespace(xml_name.to_string()))? + .namespace + .clone(); - for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| { - if let xml::Attribute::Attribute { name, value } = attribute { - Some((name, value)) - } else { - None - } - }) { - let namespace; - let attribute_name; - match q_name { - xml::QName::PrefixedName(prefixed_name) => { - namespace = namespace_stack.iter().rfind(|namespace| { - namespace.prefix.as_deref() == Some(**prefixed_name.prefix) - }); - attribute_name = prefixed_name.local_part.to_string(); - } - xml::QName::UnprefixedName(unprefixed_name) => { - namespace = namespace_stack - .iter() - .rfind(|namespace| namespace.prefix == None); - attribute_name = unprefixed_name.to_string(); - } - } - if let Some(namespace) = namespace { - let namespace = (*namespace).clone(); - let name = Name { - namespace: namespace.namespace, - name: attribute_name, - }; - let value = value.process()?; - if let Some(_value) = attributes.insert(name, value) { - return Err(Error::DuplicateAttribute(q_name.to_string())); - } - } else { - return Err(Error::UnqualifiedNamespace(q_name.to_string())); - } - } + let element_name = Name { + namespace: element_default_namespace, + local_name: element_local_name, + }; - let name; - let namespace; - match &s_tag.name { - xml::QName::PrefixedName(prefixed_name) => { - namespace = namespace_stack.iter().rfind(|namespace| { - namespace.prefix.as_deref() == Some(**prefixed_name.prefix) - }); - name = prefixed_name.local_part.to_string(); - } - xml::QName::UnprefixedName(unprefixed_name) => { - namespace = namespace_stack + // attributes + + let mut attributes = HashMap::new(); + for (q_name, value) in xml_attributes.iter().filter_map(|attribute| { + if let xml::Attribute::Attribute { name, value } = attribute { + Some((name, value)) + } else { + None + } + }) { + let attribute_namespace_declaration; + let attribute_local_name = q_name.local_part().to_string(); + match q_name.prefix() { + Some(prefix) => { + attribute_namespace_declaration = + namespace_declarations_stack .iter() - .rfind(|namespace| namespace.prefix == None); - name = unprefixed_name.to_string(); - } + .rfind(|namespace_declaration| { + namespace_declaration.prefix.as_deref() == Some(prefix) + }); } - - let namespace = (*namespace - .ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?) - .clone(); - + None => attribute_namespace_declaration = element_namespace_declaration, + } + if let Some(namespace_declaration) = attribute_namespace_declaration { let name = Name { - namespace: namespace.namespace, - name, + namespace: namespace_declaration.namespace.clone(), + local_name: attribute_local_name, }; + let value = value.process()?; + // check for duplicate attribute + if let Some(_value) = attributes.insert(name, value) { + return Err(Error::DuplicateAttribute(q_name.to_string())); + } + } else { + return Err(Error::UnqualifiedNamespace(q_name.to_string())); + } + } - namespaces.push(namespace_declarations.clone()); - - let content = Self::content_from_xml(namespaces, content)?; + let content; + if let Some(xml_content) = xml_content { + namespace_declarations.push(element_namespace_declarations.clone()); - namespaces.pop(); + content = Self::content_from_xml(namespace_declarations, xml_content)?; - return Ok(Element { - name, - attributes, - content, - }); - } + namespace_declarations.pop(); + } else { + content = Vec::new(); } + + return Ok(Element { + name: element_name, + namespace_declarations: element_namespace_declarations, + attributes, + content, + }); } fn content_from_xml( namespaces: &mut Vec<HashSet<NamespaceDeclaration>>, - element: xml::Content, + xml_content: xml::Content, ) -> Result<Vec<Content>> { let mut content = Vec::new(); - let mut text = element.char_data.map(|str| String::from(*str)); - for (content_item, char_data) in element.content { + let mut text = xml_content.char_data.map(|str| String::from(*str)); + for (content_item, char_data) in xml_content.content { match content_item { xml::ContentItem::Element(element) => { text.map(|text| content.push(Content::Text(text))); |