From addbdb3547458dd8a34bd67ba2f124582f85d111 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 22 Jan 2024 11:45:14 +0100 Subject: [PATCH 01/10] Inject an environment element into the input in IngestReader --- etc/ingest.xslt | 2 ++ src/icat/ingest.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/etc/ingest.xslt b/etc/ingest.xslt index 6e1e5cee..2e8df084 100644 --- a/etc/ingest.xslt +++ b/etc/ingest.xslt @@ -12,6 +12,8 @@ + + diff --git a/src/icat/ingest.py b/src/icat/ingest.py index 5d69b963..47b00a2a 100644 --- a/src/icat/ingest.py +++ b/src/icat/ingest.py @@ -106,6 +106,9 @@ def __init__(self, client, metadata, investigation): schema = etree.XMLSchema(etree.parse(f)) if not schema.validate(ingest_data): raise InvalidIngestFileError("validation failed") + env = self.get_environment(client) + env_elem = etree.Element("_environment", **env) + ingest_data.getroot().insert(1, env_elem) with self.get_xslt(ingest_data).open("rb") as f: xslt = etree.XSLT(etree.parse(f)) super().__init__(client, xslt(ingest_data)) @@ -176,6 +179,17 @@ def get_xslt(self, ingest_data): raise InvalidIngestFileError("unknown format") return self.SchemaDir / xslt + def get_environment(self, client): + """Get the environment to be injected as an element into the input. + + :param client: the client object being used by this + IngestReader. + :type client: :class:`icat.client.Client` + :return: the environment. + :rtype: :class:`dict` + """ + return dict(icat_version=str(client.apiversion)) + def getobjs_from_data(self, data, objindex): typed_objindex = set() for key, obj in super().getobjs_from_data(data, objindex): From 56e5bb2ffb76ddcb1e468bfb538780acbf782f40 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 22 Jan 2024 14:37:56 +0100 Subject: [PATCH 02/10] Add a test for accessing the environment added in IngestReader with XSLT --- MANIFEST.in | 1 + tests/data/ingest-env.xslt | 59 ++++++++++++++++++++++++++++++++++++++ tests/test_06_ingest.py | 35 ++++++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 tests/data/ingest-env.xslt diff --git a/MANIFEST.in b/MANIFEST.in index a7c92f8b..1a57e4e0 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -16,6 +16,7 @@ include doc/tutorial/*.py include etc/ingest-*.xsd include etc/ingest.xslt include tests/conftest.py +include tests/data/ingest-env.xslt include tests/data/legacy-icatdump-*.xml include tests/data/legacy-icatdump-*.yaml include tests/data/metadata-*.xml diff --git a/tests/data/ingest-env.xslt b/tests/data/ingest-env.xslt new file mode 100644 index 00000000..1c60c4bc --- /dev/null +++ b/tests/data/ingest-env.xslt @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + 2024-01-22T14:30:51+01:00 + + + + ingest-env.xslt + + + + + + + + + + + + + + + false + + + + + + + + + + + + + + + + _Investigation + + + + + + + + + diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py index b9c560f3..8806d957 100644 --- a/tests/test_06_ingest.py +++ b/tests/test_06_ingest.py @@ -714,3 +714,38 @@ def test_custom_ingest(client, investigation, samples, schemadir, case): ds = client.assertedSearch(query)[0] for query, res in case.checks[name]: assert client.assertedSearch(query % ds.id)[0] == res + + +env_cases = [ + Case( + data = ["testingest_inl_1", "testingest_inl_2"], + metadata = gettestdata("metadata-4.4-inl.xml"), + schema = gettestdata("icatdata-4.4.xsd"), + checks = {}, + marks = (), + ), +] +@pytest.mark.parametrize("case", [ + pytest.param(c, id=c.metadata.name, marks=c.marks) for c in env_cases +]) +def test_ingest_env(monkeypatch, client, investigation, schemadir, case): + """Test using the _environment element. + + Applying a custom XSLT that extracts an attribute from the + _environment element that is injected by IngestReader into the + input data and puts that values into the head element of the + transformed input. This is to test that adding the _environment + element works and it is in principle possible to make use of the + values in the XSLT. + """ + monkeypatch.setattr(IngestReader, + "XSLT_Map", dict(icatingest="ingest-env.xslt")) + datasets = [] + for name in case.data: + datasets.append(client.new("Dataset", name=name)) + reader = IngestReader(client, case.metadata, investigation) + with case.schema.open("rb") as f: + schema = etree.XMLSchema(etree.parse(f)) + assert schema.validate(reader.infile) + ver = reader.infile.xpath("/icatdata/head/apiversion")[0].text + assert ver == str(client.apiversion) From 0052bc526f30fe52e7960315b85312badef097af Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 22 Jan 2024 15:33:46 +0100 Subject: [PATCH 03/10] Add an assertion the the version element is found before accessing its text value --- tests/test_06_ingest.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py index 8806d957..b27a9afe 100644 --- a/tests/test_06_ingest.py +++ b/tests/test_06_ingest.py @@ -747,5 +747,6 @@ def test_ingest_env(monkeypatch, client, investigation, schemadir, case): with case.schema.open("rb") as f: schema = etree.XMLSchema(etree.parse(f)) assert schema.validate(reader.infile) - ver = reader.infile.xpath("/icatdata/head/apiversion")[0].text - assert ver == str(client.apiversion) + version_elem = reader.infile.xpath("/icatdata/head/apiversion") + assert version_elem + assert version_elem[0].text == str(client.apiversion) From c28521b647429a5afc2a0052c0c4f300ec9fc10e Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 22 Jan 2024 15:52:52 +0100 Subject: [PATCH 04/10] Minor: move the additional _environment element at index 0 --- src/icat/ingest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/icat/ingest.py b/src/icat/ingest.py index 47b00a2a..6297bab6 100644 --- a/src/icat/ingest.py +++ b/src/icat/ingest.py @@ -108,7 +108,7 @@ def __init__(self, client, metadata, investigation): raise InvalidIngestFileError("validation failed") env = self.get_environment(client) env_elem = etree.Element("_environment", **env) - ingest_data.getroot().insert(1, env_elem) + ingest_data.getroot().insert(0, env_elem) with self.get_xslt(ingest_data).open("rb") as f: xslt = etree.XSLT(etree.parse(f)) super().__init__(client, xslt(ingest_data)) From 7894e9e038e7b0197f2d195ab4db5aa155c43139 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 22 Jan 2024 16:38:48 +0100 Subject: [PATCH 05/10] Document the change --- doc/src/ingest.rst | 4 ---- src/icat/ingest.py | 10 ++++++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/doc/src/ingest.rst b/doc/src/ingest.rst index 37785613..a9610f30 100644 --- a/doc/src/ingest.rst +++ b/doc/src/ingest.rst @@ -55,10 +55,6 @@ the ``Dataset``. .. versionchanged:: 1.2.0 add version 1.1 of the ingest file format, including references to samples -.. versionchanged:: 1.3.0 - drop class attribute :attr:`~icat.ingest.IngestReader.XSLT_name` in - favour of :attr:`~icat.ingest.IngestReader.XSLT_Map`. - .. autoclass:: icat.ingest.IngestReader :members: :show-inheritance: diff --git a/src/icat/ingest.py b/src/icat/ingest.py index 6297bab6..98dff164 100644 --- a/src/icat/ingest.py +++ b/src/icat/ingest.py @@ -69,6 +69,14 @@ class IngestReader(XMLDumpFileReader): :type investigation: :class:`icat.entity.Entity` :raise icat.exception.InvalidIngestFileError: if the input in metadata is not valid. + + .. versionchanged:: 1.3.0 + drop class attribute :attr:`~icat.ingest.IngestReader.XSLT_name` + in favour of :attr:`~icat.ingest.IngestReader.XSLT_Map`. + + .. versionchanged:: 1.3.0 + inject an element `_environment` as first child of the root + element into the input data. """ SchemaDir = Path("/usr/share/icat") @@ -187,6 +195,8 @@ def get_environment(self, client): :type client: :class:`icat.client.Client` :return: the environment. :rtype: :class:`dict` + + .. versionadded:: 1.3.0 """ return dict(icat_version=str(client.apiversion)) From 76701a1912afb0ff71c3ca1a5459014a47830f3f Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 22 Jan 2024 16:17:59 +0100 Subject: [PATCH 06/10] Move the code to inject the environment element into the input into a separate method of IngestReader, so it can be overridden in subclasses --- src/icat/ingest.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/icat/ingest.py b/src/icat/ingest.py index 98dff164..0b8f2e8f 100644 --- a/src/icat/ingest.py +++ b/src/icat/ingest.py @@ -114,9 +114,7 @@ def __init__(self, client, metadata, investigation): schema = etree.XMLSchema(etree.parse(f)) if not schema.validate(ingest_data): raise InvalidIngestFileError("validation failed") - env = self.get_environment(client) - env_elem = etree.Element("_environment", **env) - ingest_data.getroot().insert(0, env_elem) + self.add_environment(client, ingest_data) with self.get_xslt(ingest_data).open("rb") as f: xslt = etree.XSLT(etree.parse(f)) super().__init__(client, xslt(ingest_data)) @@ -200,6 +198,21 @@ def get_environment(self, client): """ return dict(icat_version=str(client.apiversion)) + def add_environment(self, client, ingest_data): + """Inject environment information into input data. + + :param client: the client object being used by this + IngestReader. + :type client: :class:`icat.client.Client` + :param ingest_data: input data + :type ingest_data: :class:`lxml.etree._ElementTree` + + .. versionadded:: 1.3.0 + """ + env = self.get_environment(client) + env_elem = etree.Element("_environment", **env) + ingest_data.getroot().insert(0, env_elem) + def getobjs_from_data(self, data, objindex): typed_objindex = set() for key, obj in super().getobjs_from_data(data, objindex): From 47ef98facf21ad6b4becb217515411327d66862b Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 22 Jan 2024 17:00:38 +0100 Subject: [PATCH 07/10] - Fix: should filter out environment element in XSLT for test_custom_ingest() - Minor: update the order of the templates in the XSLT according to order of elements in the input --- etc/ingest.xslt | 4 ++-- tests/data/ingest-env.xslt | 4 ++-- tests/data/myingest.xslt | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/etc/ingest.xslt b/etc/ingest.xslt index 2e8df084..ad14d715 100644 --- a/etc/ingest.xslt +++ b/etc/ingest.xslt @@ -10,10 +10,10 @@ - - + + diff --git a/tests/data/ingest-env.xslt b/tests/data/ingest-env.xslt index 1c60c4bc..8e0eb4e7 100644 --- a/tests/data/ingest-env.xslt +++ b/tests/data/ingest-env.xslt @@ -10,6 +10,8 @@ + + 2024-01-22T14:30:51+01:00 @@ -20,8 +22,6 @@ - - diff --git a/tests/data/myingest.xslt b/tests/data/myingest.xslt index 4016a60e..7b7f591b 100644 --- a/tests/data/myingest.xslt +++ b/tests/data/myingest.xslt @@ -10,6 +10,8 @@ + + From fbe060005434ebe1423678a8fc6ead52ca7b4f54 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 30 Jan 2024 11:50:44 +0100 Subject: [PATCH 08/10] test_06_ingest.py: don't use an icatdata schema adapted to the test case, always validate against the schema according to the ICAT server version, we are talking to --- tests/conftest.py | 14 ++++++++++++++ tests/test_06_ingest.py | 25 +++++-------------------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 25c01dcb..104901da 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -159,6 +159,20 @@ def require_dumpfile_backend(backend): _skip("need %s backend for icat.dumpfile" % (backend)) +def get_icatdata_schema(): + if icat_version < "4.4": + fname = "icatdata-4.3.xsd" + elif icat_version < "4.7": + fname = "icatdata-4.4.xsd" + elif icat_version < "4.10": + fname = "icatdata-4.7.xsd" + elif icat_version < "5.0": + fname = "icatdata-4.10.xsd" + else: + fname = "icatdata-5.0.xsd" + return gettestdata(fname) + + def get_reference_dumpfile(ext = "yaml"): require_icat_version("4.4.0", "oldest available set of test data") if icat_version < "4.7": diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py index b27a9afe..e0456d67 100644 --- a/tests/test_06_ingest.py +++ b/tests/test_06_ingest.py @@ -11,7 +11,8 @@ import icat.config from icat.ingest import IngestReader from icat.query import Query -from conftest import getConfig, gettestdata, icat_version, testdatadir +from conftest import (getConfig, gettestdata, icat_version, + get_icatdata_schema, testdatadir) def get_test_investigation(client): @@ -80,14 +81,13 @@ class MyIngestReader(IngestReader): cet = datetime.timezone(datetime.timedelta(hours=1)) cest = datetime.timezone(datetime.timedelta(hours=2)) -Case = namedtuple('Case', ['data', 'metadata', 'schema', 'checks', 'marks']) +Case = namedtuple('Case', ['data', 'metadata', 'checks', 'marks']) # Try out different variants for the metadata input file cases = [ Case( data = ["testingest_inl_1", "testingest_inl_2"], metadata = gettestdata("metadata-4.4-inl.xml"), - schema = gettestdata("icatdata-4.4.xsd"), checks = { "testingest_inl_1": [ ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", @@ -127,7 +127,6 @@ class MyIngestReader(IngestReader): Case( data = ["testingest_inl5_1", "testingest_inl5_2"], metadata = gettestdata("metadata-5.0-inl.xml"), - schema = gettestdata("icatdata-5.0.xsd"), checks = { "testingest_inl5_1": [ ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", @@ -186,7 +185,6 @@ class MyIngestReader(IngestReader): Case( data = ["testingest_sep_1", "testingest_sep_2"], metadata = gettestdata("metadata-4.4-sep.xml"), - schema = gettestdata("icatdata-4.4.xsd"), checks = { "testingest_sep_1": [ ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", @@ -226,7 +224,6 @@ class MyIngestReader(IngestReader): Case( data = ["testingest_sep5_1", "testingest_sep5_2"], metadata = gettestdata("metadata-5.0-sep.xml"), - schema = gettestdata("icatdata-5.0.xsd"), checks = { "testingest_sep5_1": [ ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", @@ -286,7 +283,6 @@ class MyIngestReader(IngestReader): data = [ "testingest_sample_1", "testingest_sample_2", "testingest_sample_3", "testingest_sample_4" ], metadata = gettestdata("metadata-sample.xml"), - schema = gettestdata("icatdata-4.4.xsd"), checks = { "testingest_sample_1": [ ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", @@ -357,7 +353,7 @@ def test_ingest_schema(client, investigation, schemadir, case): for name in case.data: datasets.append(client.new("Dataset", name=name)) reader = IngestReader(client, case.metadata, investigation) - with case.schema.open("rb") as f: + with get_icatdata_schema().open("rb") as f: schema = etree.XMLSchema(etree.parse(f)) assert schema.validate(reader.infile) @@ -406,7 +402,6 @@ def test_ingest(client, investigation, samples, schemadir, case): Case( data = ["testingest_io_1"], metadata = io_metadata, - schema = gettestdata("icatdata-4.4.xsd"), checks = { "testingest_io_1": [ ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", @@ -529,28 +524,24 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case): Case( data = [], metadata = invalid_root_metadata, - schema = None, checks = {}, marks = (), ), Case( data = [], metadata = invalid_ver_metadata, - schema = None, checks = {}, marks = (), ), Case( data = ["testingest_err_invalid_ref"], metadata = invalid_ref_metadata, - schema = gettestdata("icatdata-4.4.xsd"), checks = {}, marks = (), ), Case( data = ["testingest_err_invalid_dup"], metadata = invalid_dup_metadata, - schema = gettestdata("icatdata-4.4.xsd"), checks = {}, marks = (), ), @@ -558,7 +549,6 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case): data = ["testingest_err_invalid_dup_id_1", "testingest_err_invalid_dup_id_2"], metadata = invalid_dup_id_metadata, - schema = gettestdata("icatdata-4.4.xsd"), checks = {}, marks = (), ), @@ -614,14 +604,12 @@ def test_ingest_error_invalid(client, investigation, schemadir, case): Case( data = ["testingest_err_search_attr"], metadata = searcherr_attr_metadata, - schema = gettestdata("icatdata-4.4.xsd"), checks = {}, marks = (), ), Case( data = ["testingest_err_search_ref"], metadata = searcherr_ref_metadata, - schema = gettestdata("icatdata-4.4.xsd"), checks = {}, marks = (), ), @@ -642,7 +630,6 @@ def test_ingest_error_searcherr(client, investigation, schemadir, case): Case( data = ["testingest_custom_icatingest_1"], metadata = gettestdata("metadata-custom-icatingest.xml"), - schema = gettestdata("icatdata-4.4.xsd"), checks = { "testingest_custom_icatingest_1": [ ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", @@ -662,7 +649,6 @@ def test_ingest_error_searcherr(client, investigation, schemadir, case): Case( data = ["testingest_custom_myingest_1"], metadata = gettestdata("metadata-custom-myingest.xml"), - schema = gettestdata("icatdata-4.4.xsd"), checks = { "testingest_custom_myingest_1": [ ("SELECT ds.description FROM Dataset ds WHERE ds.id = %d", @@ -720,7 +706,6 @@ def test_custom_ingest(client, investigation, samples, schemadir, case): Case( data = ["testingest_inl_1", "testingest_inl_2"], metadata = gettestdata("metadata-4.4-inl.xml"), - schema = gettestdata("icatdata-4.4.xsd"), checks = {}, marks = (), ), @@ -744,7 +729,7 @@ def test_ingest_env(monkeypatch, client, investigation, schemadir, case): for name in case.data: datasets.append(client.new("Dataset", name=name)) reader = IngestReader(client, case.metadata, investigation) - with case.schema.open("rb") as f: + with get_icatdata_schema().open("rb") as f: schema = etree.XMLSchema(etree.parse(f)) assert schema.validate(reader.infile) version_elem = reader.infile.xpath("/icatdata/head/apiversion") From 340d1ac41d7e52dcb413c0c1271ce720458cb639 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 28 Feb 2024 12:46:40 +0100 Subject: [PATCH 09/10] Document the processing of ingest files in IngestReader and the _environment element that is injected into the metadata during that processing --- doc/src/ingest.rst | 51 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/doc/src/ingest.rst b/doc/src/ingest.rst index 9ab94740..8245e0fd 100644 --- a/doc/src/ingest.rst +++ b/doc/src/ingest.rst @@ -44,6 +44,57 @@ objects read from the input file in ICAT. :show-inheritance: +.. _ingest-process: + +Ingest process +-------------- + +The processing of ingest files during the instantiation of an +:class:`~icat.ingest.IngestReader` object may be summarized with the +following steps: + +1. Read the metadata and parse the :class:`lxml.etree._ElementTree`. + +2. Call :meth:`~icat.ingest.IngestReader.get_xsd` to get the + appropriate XSD file and validate the metadata against that schema. + +3. Inject an ``_environment`` element as first child of the ``data`` + element, see below. + +4. Call :meth:`~icat.ingest.IngestReader.get_xslt` to get the + appropriate XSLT file and transform the metadata into generic ICAT + data XML file format. + +5. Feed the result of the transformation into the parent class + :class:`~icat.dumpfile_xml.XMLDumpFileReader`. + +Once this initialization is done, +:meth:`~icat.ingest.IngestReader.ingest` may be called to read the +individual objects defined in the metadata. + + +.. _ingest-environment: + +The environment element +----------------------- + +During the processing of ingest files, an ``_environment`` element +will be injected as the first child of the ``data`` element. In the +current version of python-icat, this ``_environment`` element has the +following attributes: + + `icat_version` + Version of the ICAT server this client connects to, e.g. the + :attr:`icat.client.Client.apiversion` attribute of the `client` + object being used by this :class:`~icat.ingest.IngestReader`. + +More attributes may be added in future versions. This +``_environment`` element may be used by the XSLT in order to adapt the +result of the transformation to the environment, in particular to +adapt the output to the ICAT schema version it is supposed to conform +to. + + .. _ingest-example: Ingest example From 77314423049c2e95a25d7e922211024406b0bcc2 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 28 Feb 2024 23:08:56 +0100 Subject: [PATCH 10/10] Update changelog --- CHANGES.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index cc10d5fc..5c6cb3ad 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,6 +14,9 @@ New features processing the input in custom versions of :class:`icat.ingest.IngestReader`. ++ `#148`_, `#149`_: Inject an additional element with environment + information into the input data in :class:`icat.ingest.IngestReader`. + + `#146`_, `#147`_: Better error handling in :class:`icat.ingest.IngestReader`. @@ -40,6 +43,8 @@ Bug fixes and minor changes .. _#145: https://github.com/icatproject/python-icat/pull/145 .. _#146: https://github.com/icatproject/python-icat/issues/146 .. _#147: https://github.com/icatproject/python-icat/pull/147 +.. _#148: https://github.com/icatproject/python-icat/issues/148 +.. _#149: https://github.com/icatproject/python-icat/pull/149 .. _changes-1_2_0: