diff --git a/lib/lutaml/model.rb b/lib/lutaml/model.rb index 9564dba8..60141b6a 100644 --- a/lib/lutaml/model.rb +++ b/lib/lutaml/model.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require "moxml" require_relative "model/version" require_relative "model/loggable" require_relative "model/type" diff --git a/lib/lutaml/model/config.rb b/lib/lutaml/model/config.rb index 23deca61..0465e1ae 100644 --- a/lib/lutaml/model/config.rb +++ b/lib/lutaml/model/config.rb @@ -59,6 +59,7 @@ def configure cause: nil, ) end + Moxml::Adapter.load(type_name) unless KEY_VALUE_FORMATS.include?(adapter_name) instance_variable_set( :"@#{adapter}", diff --git a/lib/lutaml/model/serialize.rb b/lib/lutaml/model/serialize.rb index dfc45d7d..dac500ea 100644 --- a/lib/lutaml/model/serialize.rb +++ b/lib/lutaml/model/serialize.rb @@ -506,7 +506,7 @@ def apply_xml_mapping(doc, instance, options = {}) attr = attribute_for_rule(rule) value = if rule.raw_mapping? - doc.node.inner_xml + inner_xml_of(doc.node) elsif rule.content_mapping? doc[rule.content_key] elsif val = value_for_rule(doc, rule, options) @@ -643,6 +643,17 @@ def validate_sequence!(element_order) mapping.validate_content!(current_order) end end + + private + + def inner_xml_of(node) + case node + when XmlAdapter::XmlElement + node.inner_xml + else + node.children.map(&:to_xml).join + end + end end attr_accessor :element_order, :schema_location, :encoding diff --git a/lib/lutaml/model/xml_adapter/builder/oga.rb b/lib/lutaml/model/xml_adapter/builder/oga.rb index ad2296b4..175d3055 100644 --- a/lib/lutaml/model/xml_adapter/builder/oga.rb +++ b/lib/lutaml/model/xml_adapter/builder/oga.rb @@ -167,6 +167,8 @@ def respond_to_missing?(method_name, include_private = false) def element_attributes(oga_element, attributes) oga_element.attributes = attributes.map do |name, value| + value = value.uri unless value.is_a?(String) + ::Oga::XML::Attribute.new( name: name, value: value, diff --git a/lib/lutaml/model/xml_adapter/oga_adapter.rb b/lib/lutaml/model/xml_adapter/oga_adapter.rb index 5a7a46ef..b52fa7f2 100644 --- a/lib/lutaml/model/xml_adapter/oga_adapter.rb +++ b/lib/lutaml/model/xml_adapter/oga_adapter.rb @@ -1,4 +1,5 @@ require "oga" +require "moxml/adapter/oga" require_relative "xml_document" require_relative "oga/document" require_relative "oga/element" @@ -8,17 +9,15 @@ module Lutaml module Model module XmlAdapter class OgaAdapter < XmlDocument + TEXT_CLASSES = [Moxml::Text, Moxml::Cdata].freeze + def self.parse(xml, options = {}) - encoding = encoding(xml, options) - xml = xml.encode("UTF-16").encode("UTF-8") if encoding && encoding != "UTF-8" - parsed = ::Oga.parse_xml(xml) - @root = Oga::Element.new(parsed.children.first) - new(@root, encoding) + parsed = Moxml::Adapter::Oga.parse(xml) + new(parsed.root, encoding(xml, options)) end def to_xml(options = {}) builder_options = {} - builder_options[:encoding] = if options.key?(:encoding) options[:encoding] elsif options.key?(:parse_encoding) @@ -27,16 +26,87 @@ def to_xml(options = {}) "UTF-8" end - builder = Builder::Oga.build(builder_options) do |xml| - if @root.is_a?(Oga::Element) - @root.build_xml(xml) + builder = if @root.is_a?(Moxml::Element) + @root + else + Builder::Oga.build(options) do |xml| + build_element(xml, @root, options) + end + end + xml_data = builder.to_xml + options[:declaration] ? declaration(options) + xml_data : xml_data + end + + def attributes_hash(element) + result = Lutaml::Model::MappingHash.new + + element.attributes.each do |attr| + if attr.name == "schemaLocation" + result["__schema_location"] = { + namespace: attr.namespace, + prefix: attr.namespace.prefix, + schema_location: attr.value, + } else - build_element(xml, @root, options) + result[self.class.namespaced_attr_name(attr)] = attr.value end end - xml_data = builder.to_xml - options[:declaration] ? declaration(options) + xml_data : xml_data + result + end + + def self.name_of(element) + case element + when Moxml::Text + "text" + when Moxml::Cdata + "cdata" + else + element.name + end + end + + def self.prefixed_name_of(node) + return name_of(node) if TEXT_CLASSES.include?(node.class) + + [node&.namespace&.prefix, node.name].compact.join(":") + end + + def self.text_of(element) + element.content + end + + def self.namespaced_attr_name(attribute) + attr_ns = attribute.namespace + attr_name = attribute.name + return attr_name unless attr_ns + + prefix = attr_name == "lang" ? attr_ns.prefix : attr_ns.uri + [prefix, attr_name].compact.join(":") + end + + def self.namespaced_name_of(node) + return name_of(node) unless node.respond_to?(:namespace) + + [node&.namespace&.uri, node.name].compact.join(":") + end + + def order + children.map do |child| + type = child.text? ? "Text" : "Element" + Element.new(type, child.unprefixed_name) + end + end + + def self.order_of(element) + element.children.map do |child| + instance_args = if TEXT_CLASSES.include?(child.class) + ["Text", "text"] + else + ["Element", name_of(child)] + end + Element.new(*instance_args) + end end private diff --git a/lib/lutaml/model/xml_adapter/xml_document.rb b/lib/lutaml/model/xml_adapter/xml_document.rb index ade2ee85..13b22c57 100644 --- a/lib/lutaml/model/xml_adapter/xml_document.rb +++ b/lib/lutaml/model/xml_adapter/xml_document.rb @@ -84,22 +84,32 @@ def build_options_for_nested_elements(options = {}) def parse_element(element, klass = nil, format = nil) result = Lutaml::Model::MappingHash.new result.node = element - result.item_order = element.order + result.item_order = self.class.order_of(element) element.children.each do |child| if klass&.<= Serialize - attr = klass.attribute_for_child(child.name, + attr = klass.attribute_for_child(self.class.name_of(child), format) end - next result.assign_or_append_value(child.name, child.text) if child.text? + if child.respond_to?(:text?) && child.text? + result.assign_or_append_value( + self.class.name_of(child), + self.class.text_of(child), + ) + next + end result["elements"] ||= Lutaml::Model::MappingHash.new - result["elements"].assign_or_append_value(child.namespaced_name, parse_element(child, attr&.type || klass, format)) + result["elements"].assign_or_append_value( + self.class.namespaced_name_of(child), + parse_element(child, attr&.type || klass, format), + ) end result["attributes"] = attributes_hash(element) if element.attributes&.any? + result.merge(attributes_hash(element)) result end @@ -388,6 +398,22 @@ def namespace_attributes(xml_mapping) def self.type Utils.snake_case(self).split("/").last.split("_").first end + + def self.order_of(element) + element.order + end + + def self.name_of(element) + element.name + end + + def self.text_of(element) + element.text + end + + def self.namespaced_name_of(element) + element.namespaced_name + end end end end diff --git a/lutaml-model.gemspec b/lutaml-model.gemspec index b337dcdd..993e7146 100644 --- a/lutaml-model.gemspec +++ b/lutaml-model.gemspec @@ -30,6 +30,7 @@ Gem::Specification.new do |spec| end spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } + spec.add_dependency "moxml" spec.add_dependency "thor" spec.metadata["rubygems_mfa_required"] = "true" end diff --git a/spec/lutaml/model/mixed_content_spec.rb b/spec/lutaml/model/mixed_content_spec.rb index fa7a0f43..ae132757 100644 --- a/spec/lutaml/model/mixed_content_spec.rb +++ b/spec/lutaml/model/mixed_content_spec.rb @@ -722,10 +722,10 @@ class HexCode < Lutaml::Model::Serializable it "deserializes SHIFT encoded content correctly with explicit encoding option" do parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS") - expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter - "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS") - else + expected_content = if adapter_class == Lutaml::Model::XmlAdapter::NokogiriAdapter "手書き英字1" + else + "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS") end expect(parsed.field).to include(expected_content) @@ -734,10 +734,10 @@ class HexCode < Lutaml::Model::Serializable it "deserializes SHIFT encoded content incorrectly without explicit encoding option" do parsed = MixedContentSpec::Shift.from_xml(fixture) - expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter - "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS") - else + expected_content = if adapter_class == Lutaml::Model::XmlAdapter::NokogiriAdapter "手書き英字1" + else + "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS") end expect(parsed.encoding).to eq("Shift_JIS") @@ -757,10 +757,10 @@ class HexCode < Lutaml::Model::Serializable parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS") serialized = parsed.to_xml(encoding: "UTF-8") - parsed_xml = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter - "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS") - else + parsed_xml = if adapter_class == Lutaml::Model::XmlAdapter::NokogiriAdapter "手書き英字1" + else + "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS") end expect(parsed.field).to include(parsed_xml) @@ -774,10 +774,10 @@ class HexCode < Lutaml::Model::Serializable parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS") serialized = parsed.to_xml(encoding: "Shift_JIS") - expected_xml = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter - "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS") - else + expected_xml = if adapter_class == Lutaml::Model::XmlAdapter::NokogiriAdapter "手書き英字1" + else + "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS") end expect(parsed.field).to include(expected_xml) @@ -840,10 +840,10 @@ class HexCode < Lutaml::Model::Serializable it "deserializes latin encoded content correctly" do parsed = MixedContentSpec::Latin.from_xml(fixture, encoding: "ISO-8859-1") - expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter - ["M\xFCller".force_encoding("ISO-8859-1"), "Jos\xE9".force_encoding("ISO-8859-1")] - else + expected_content = if adapter_class == Lutaml::Model::XmlAdapter::NokogiriAdapter ["Müller", "José"] + else + ["M\xFCller".force_encoding("ISO-8859-1"), "Jos\xE9".force_encoding("ISO-8859-1")] end expect(parsed.encoding).to eq("ISO-8859-1") @@ -854,10 +854,10 @@ class HexCode < Lutaml::Model::Serializable it "deserializes latin encoded content correctly, bcz xml.encoding used for parsing" do parsed = MixedContentSpec::Latin.from_xml(fixture) - expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter - ["M\xFCller".force_encoding("ISO-8859-1"), "Jos\xE9".force_encoding("ISO-8859-1")] - else + expected_content = if adapter_class == Lutaml::Model::XmlAdapter::NokogiriAdapter ["Müller", "José"] + else + ["M\xFCller".force_encoding("ISO-8859-1"), "Jos\xE9".force_encoding("ISO-8859-1")] end expect(parsed.encoding).to eq("ISO-8859-1") diff --git a/spec/lutaml/model/xml_adapter/oga_adapter_spec.rb b/spec/lutaml/model/xml_adapter/oga_adapter_spec.rb index ac951de5..997c0c18 100644 --- a/spec/lutaml/model/xml_adapter/oga_adapter_spec.rb +++ b/spec/lutaml/model/xml_adapter/oga_adapter_spec.rb @@ -14,7 +14,7 @@ let(:document) { described_class.parse(xml_string) } context "parsing XML with namespaces" do - let(:child) { document.root.children[1] } + let(:child) { document.root.children.first } it "parses the root element with default namespace" do expect(document.root.name).to eq("root") @@ -23,34 +23,38 @@ end it "parses child element with prefixed namespace" do - expect(child.name).to eq("prefix:child") + expect(described_class.prefixed_name_of(child)).to eq("prefix:child") expect(child.namespace.uri).to eq("http://example.com/prefixed") expect(child.namespace.prefix).to eq("prefix") end it "parses attributes with and without namespaces" do - expect(child.attributes["attr"].value).to eq("value") - expect(child.attributes["attr"].namespace).to be_nil - expect(child.attributes["prefix:attr"].value).to eq("prefixed_value") - expect(child.attributes["prefix:attr"].namespace).to eq("http://example.com/prefixed") - expect(child.attributes["prefix:attr"].namespace_prefix).to eq("prefix") + prefixed_attr = child.attributes.find { |attr| attr&.namespace&.prefix == "prefix" && attr.name == "attr" } + no_prefixed_attr = child.attributes.find { |attr| attr.name == "attr" && attr.namespace.nil? } + expect(no_prefixed_attr.value).to eq("value") + expect(no_prefixed_attr.namespace).to be_nil + expect(prefixed_attr.value).to eq("prefixed_value") + expect(prefixed_attr.namespace.uri).to eq("http://example.com/prefixed") + expect(prefixed_attr.namespace.prefix).to eq("prefix") end end context "generating XML with namespaces" do it "generates XML with namespaces correctly" do - xml_output = document.to_xml - parsed_output = Oga.parse_xml(xml_output) + xml_output = document.root.to_xml + parsed_output = Moxml::Adapter::Oga.parse(xml_output) root = parsed_output.children.first expect(root.name).to eq("root") expect(root.namespace.uri).to eq("http://example.com/default") - child = root.children[1] - expect(child.expanded_name).to eq("prefix:child") + child = root.children.first + expect(described_class.prefixed_name_of(child)).to eq("prefix:child") expect(child.namespace.uri).to eq("http://example.com/prefixed") - expect(child.get("attr")).to eq("value") - expect(child.get("prefix:attr")).to eq("prefixed_value") + unprefixed_attr = child.attributes.find { |attr| attr.name == "attr" } + expect(unprefixed_attr.value).to eq("value") + prefixed_attr = child.attributes.find { |attr| described_class.prefixed_name_of(attr) == "prefix:attr" } + expect(prefixed_attr.value).to eq("prefixed_value") end end end diff --git a/spec/lutaml/model/xml_mapping_spec.rb b/spec/lutaml/model/xml_mapping_spec.rb index 04eddca1..951f8a14 100644 --- a/spec/lutaml/model/xml_mapping_spec.rb +++ b/spec/lutaml/model/xml_mapping_spec.rb @@ -344,6 +344,7 @@ class Schema < Lutaml::Model::Serializable it "checks the attribute with and without namespace" do parsed = XmlMapping::AttributeNamespace.from_xml(input_xml) + expect(parsed.alpha).to eq("hello") expect(parsed.beta).to eq("bye") expect(parsed.to_xml).to be_equivalent_to(input_xml) @@ -382,12 +383,12 @@ class Schema < Lutaml::Model::Serializable end let(:oga_expected_xml) do - "" + - "" + - "GML App" + - "CityGML App" + - "App" + - "" + + "" \ + "" \ + "GML App" \ + "CityGML App" \ + "App" \ + "" \ "" end @@ -477,26 +478,26 @@ class Schema < Lutaml::Model::Serializable end let(:expected_order) do - nokogiri_oga_pattern = create_pattern_mapping([ - ["Text", "text"], - ["Element", "ApplicationSchema"], - ["Text", "text"], - ["Element", "ApplicationSchema"], - ["Text", "text"], - ["Element", "ApplicationSchema"], - ["Text", "text"], - ]) - - ox_pattern = create_pattern_mapping([ - ["Element", "ApplicationSchema"], - ["Element", "ApplicationSchema"], - ["Element", "ApplicationSchema"], - ]) + nokogiri_pattern = create_pattern_mapping([ + ["Text", "text"], + ["Element", "ApplicationSchema"], + ["Text", "text"], + ["Element", "ApplicationSchema"], + ["Text", "text"], + ["Element", "ApplicationSchema"], + ["Text", "text"], + ]) + + oga_ox_pattern = create_pattern_mapping([ + ["Element", "ApplicationSchema"], + ["Element", "ApplicationSchema"], + ["Element", "ApplicationSchema"], + ]) { - Lutaml::Model::XmlAdapter::NokogiriAdapter => nokogiri_oga_pattern, - Lutaml::Model::XmlAdapter::OxAdapter => ox_pattern, - Lutaml::Model::XmlAdapter::OgaAdapter => nokogiri_oga_pattern, + Lutaml::Model::XmlAdapter::NokogiriAdapter => nokogiri_pattern, + Lutaml::Model::XmlAdapter::OxAdapter => oga_ox_pattern, + Lutaml::Model::XmlAdapter::OgaAdapter => oga_ox_pattern, } end @@ -837,18 +838,14 @@ def create_pattern_mapping(array) XML end - let(:expected_street) do - if Lutaml::Model::Config.xml_adapter == Lutaml::Model::XmlAdapter::OxAdapter - "N\n

adf

\n" - else - "\n N\n

adf

\n " - end - end + let(:expected_nokogiri_street) { "\n N\n

adf

\n " } + let(:expected_oga_street) { "N

adf

" } + let(:expected_ox_street) { "N\n

adf

\n" } let(:model) { XmlMapping::Person.from_xml(input_xml) } it "expect to contain raw xml" do - expect(model.address.street).to eq(expected_street) + expect(model.address.street).to eq(send(:"expected_#{adapter_class.type}_street")) expect(model.address.city.strip).to eq("M") end end @@ -1064,7 +1061,11 @@ def create_pattern_mapping(array) end it "maps all the content including tags" do - inner_xml = "Str2text1123" + inner_xml = if adapter_class.type == "ox" + "Str2 text1 123" + else + "Str2text1123" + end xml = "#{inner_xml}" parsed = XmlMapping::WithMapAll.from_xml(xml) @@ -1157,6 +1158,16 @@ def create_pattern_mapping(array) XML end + let(:expected_oga_xml) do + <<~XML.strip + + B

R&C

+ C

J—C

+ O

A & B

+ F

Z ©

+ XML + end + let(:expected_ox_xml) do " " \ "B

R&C

" \ @@ -1167,8 +1178,7 @@ def create_pattern_mapping(array) end it "round-trips xml" do - expected_xml = adapter_class.type == "ox" ? expected_ox_xml : expected_nokogiri_xml - expect(XmlMapping::SpecialCharContentWithMapAll.from_xml(xml).to_xml).to eq(expected_xml) + expect(XmlMapping::SpecialCharContentWithMapAll.from_xml(xml).to_xml).to eq(send(:"expected_#{adapter_class.type}_xml")) end end