diff --git a/CHANGES.rst b/CHANGES.rst
index cc10d5fc..5c6cb3ad 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -14,6 +14,9 @@ New features
processing the input in custom versions of
:class:`icat.ingest.IngestReader`.
++ `#148`_, `#149`_: Inject an additional element with environment
+ information into the input data in :class:`icat.ingest.IngestReader`.
+
+ `#146`_, `#147`_: Better error handling in
:class:`icat.ingest.IngestReader`.
@@ -40,6 +43,8 @@ Bug fixes and minor changes
.. _#145: https://github.com/icatproject/python-icat/pull/145
.. _#146: https://github.com/icatproject/python-icat/issues/146
.. _#147: https://github.com/icatproject/python-icat/pull/147
+.. _#148: https://github.com/icatproject/python-icat/issues/148
+.. _#149: https://github.com/icatproject/python-icat/pull/149
.. _changes-1_2_0:
diff --git a/MANIFEST.in b/MANIFEST.in
index 655665c1..1e4d72c2 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -17,6 +17,7 @@ include doc/tutorial/*.py
include etc/ingest-*.xsd
include etc/ingest.xslt
include tests/conftest.py
+include tests/data/ingest-env.xslt
include tests/data/legacy-icatdump-*.xml
include tests/data/legacy-icatdump-*.yaml
include tests/data/metadata-*.xml
diff --git a/doc/src/ingest.rst b/doc/src/ingest.rst
index 9ab94740..8245e0fd 100644
--- a/doc/src/ingest.rst
+++ b/doc/src/ingest.rst
@@ -44,6 +44,57 @@ objects read from the input file in ICAT.
:show-inheritance:
+.. _ingest-process:
+
+Ingest process
+--------------
+
+The processing of ingest files during the instantiation of an
+:class:`~icat.ingest.IngestReader` object may be summarized with the
+following steps:
+
+1. Read the metadata and parse the :class:`lxml.etree._ElementTree`.
+
+2. Call :meth:`~icat.ingest.IngestReader.get_xsd` to get the
+ appropriate XSD file and validate the metadata against that schema.
+
+3. Inject an ``_environment`` element as first child of the ``data``
+ element, see below.
+
+4. Call :meth:`~icat.ingest.IngestReader.get_xslt` to get the
+ appropriate XSLT file and transform the metadata into generic ICAT
+ data XML file format.
+
+5. Feed the result of the transformation into the parent class
+ :class:`~icat.dumpfile_xml.XMLDumpFileReader`.
+
+Once this initialization is done,
+:meth:`~icat.ingest.IngestReader.ingest` may be called to read the
+individual objects defined in the metadata.
+
+
+.. _ingest-environment:
+
+The environment element
+-----------------------
+
+During the processing of ingest files, an ``_environment`` element
+will be injected as the first child of the ``data`` element. In the
+current version of python-icat, this ``_environment`` element has the
+following attributes:
+
+ `icat_version`
+ Version of the ICAT server this client connects to, e.g. the
+ :attr:`icat.client.Client.apiversion` attribute of the `client`
+ object being used by this :class:`~icat.ingest.IngestReader`.
+
+More attributes may be added in future versions. This
+``_environment`` element may be used by the XSLT in order to adapt the
+result of the transformation to the environment, in particular to
+adapt the output to the ICAT schema version it is supposed to conform
+to.
+
+
.. _ingest-example:
Ingest example
diff --git a/etc/ingest.xslt b/etc/ingest.xslt
index 6e1e5cee..ad14d715 100644
--- a/etc/ingest.xslt
+++ b/etc/ingest.xslt
@@ -10,6 +10,8 @@
+
+
diff --git a/src/icat/ingest.py b/src/icat/ingest.py
index 8b5c7fdb..0b8f2e8f 100644
--- a/src/icat/ingest.py
+++ b/src/icat/ingest.py
@@ -73,6 +73,10 @@ class IngestReader(XMLDumpFileReader):
.. versionchanged:: 1.3.0
drop class attribute :attr:`~icat.ingest.IngestReader.XSLT_name`
in favour of :attr:`~icat.ingest.IngestReader.XSLT_Map`.
+
+ .. versionchanged:: 1.3.0
+ inject an element `_environment` as first child of the root
+ element into the input data.
"""
SchemaDir = Path("/usr/share/icat")
@@ -110,6 +114,7 @@ def __init__(self, client, metadata, investigation):
schema = etree.XMLSchema(etree.parse(f))
if not schema.validate(ingest_data):
raise InvalidIngestFileError("validation failed")
+ self.add_environment(client, ingest_data)
with self.get_xslt(ingest_data).open("rb") as f:
xslt = etree.XSLT(etree.parse(f))
super().__init__(client, xslt(ingest_data))
@@ -180,6 +185,34 @@ def get_xslt(self, ingest_data):
raise InvalidIngestFileError("unknown format")
return self.SchemaDir / xslt
+ def get_environment(self, client):
+ """Get the environment to be injected as an element into the input.
+
+ :param client: the client object being used by this
+ IngestReader.
+ :type client: :class:`icat.client.Client`
+ :return: the environment.
+ :rtype: :class:`dict`
+
+ .. versionadded:: 1.3.0
+ """
+ return dict(icat_version=str(client.apiversion))
+
+ def add_environment(self, client, ingest_data):
+ """Inject environment information into input data.
+
+ :param client: the client object being used by this
+ IngestReader.
+ :type client: :class:`icat.client.Client`
+ :param ingest_data: input data
+ :type ingest_data: :class:`lxml.etree._ElementTree`
+
+ .. versionadded:: 1.3.0
+ """
+ env = self.get_environment(client)
+ env_elem = etree.Element("_environment", **env)
+ ingest_data.getroot().insert(0, env_elem)
+
def getobjs_from_data(self, data, objindex):
typed_objindex = set()
for key, obj in super().getobjs_from_data(data, objindex):
diff --git a/tests/conftest.py b/tests/conftest.py
index 25c01dcb..104901da 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -159,6 +159,20 @@ def require_dumpfile_backend(backend):
_skip("need %s backend for icat.dumpfile" % (backend))
+def get_icatdata_schema():
+ if icat_version < "4.4":
+ fname = "icatdata-4.3.xsd"
+ elif icat_version < "4.7":
+ fname = "icatdata-4.4.xsd"
+ elif icat_version < "4.10":
+ fname = "icatdata-4.7.xsd"
+ elif icat_version < "5.0":
+ fname = "icatdata-4.10.xsd"
+ else:
+ fname = "icatdata-5.0.xsd"
+ return gettestdata(fname)
+
+
def get_reference_dumpfile(ext = "yaml"):
require_icat_version("4.4.0", "oldest available set of test data")
if icat_version < "4.7":
diff --git a/tests/data/ingest-env.xslt b/tests/data/ingest-env.xslt
new file mode 100644
index 00000000..8e0eb4e7
--- /dev/null
+++ b/tests/data/ingest-env.xslt
@@ -0,0 +1,59 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 2024-01-22T14:30:51+01:00
+
+
+
+ ingest-env.xslt
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ _Investigation
+
+
+
+
+
+
+
+
+
diff --git a/tests/data/myingest.xslt b/tests/data/myingest.xslt
index 4016a60e..7b7f591b 100644
--- a/tests/data/myingest.xslt
+++ b/tests/data/myingest.xslt
@@ -10,6 +10,8 @@
+
+
diff --git a/tests/test_06_ingest.py b/tests/test_06_ingest.py
index b9c560f3..e0456d67 100644
--- a/tests/test_06_ingest.py
+++ b/tests/test_06_ingest.py
@@ -11,7 +11,8 @@
import icat.config
from icat.ingest import IngestReader
from icat.query import Query
-from conftest import getConfig, gettestdata, icat_version, testdatadir
+from conftest import (getConfig, gettestdata, icat_version,
+ get_icatdata_schema, testdatadir)
def get_test_investigation(client):
@@ -80,14 +81,13 @@ class MyIngestReader(IngestReader):
cet = datetime.timezone(datetime.timedelta(hours=1))
cest = datetime.timezone(datetime.timedelta(hours=2))
-Case = namedtuple('Case', ['data', 'metadata', 'schema', 'checks', 'marks'])
+Case = namedtuple('Case', ['data', 'metadata', 'checks', 'marks'])
# Try out different variants for the metadata input file
cases = [
Case(
data = ["testingest_inl_1", "testingest_inl_2"],
metadata = gettestdata("metadata-4.4-inl.xml"),
- schema = gettestdata("icatdata-4.4.xsd"),
checks = {
"testingest_inl_1": [
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -127,7 +127,6 @@ class MyIngestReader(IngestReader):
Case(
data = ["testingest_inl5_1", "testingest_inl5_2"],
metadata = gettestdata("metadata-5.0-inl.xml"),
- schema = gettestdata("icatdata-5.0.xsd"),
checks = {
"testingest_inl5_1": [
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -186,7 +185,6 @@ class MyIngestReader(IngestReader):
Case(
data = ["testingest_sep_1", "testingest_sep_2"],
metadata = gettestdata("metadata-4.4-sep.xml"),
- schema = gettestdata("icatdata-4.4.xsd"),
checks = {
"testingest_sep_1": [
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -226,7 +224,6 @@ class MyIngestReader(IngestReader):
Case(
data = ["testingest_sep5_1", "testingest_sep5_2"],
metadata = gettestdata("metadata-5.0-sep.xml"),
- schema = gettestdata("icatdata-5.0.xsd"),
checks = {
"testingest_sep5_1": [
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -286,7 +283,6 @@ class MyIngestReader(IngestReader):
data = [ "testingest_sample_1", "testingest_sample_2",
"testingest_sample_3", "testingest_sample_4" ],
metadata = gettestdata("metadata-sample.xml"),
- schema = gettestdata("icatdata-4.4.xsd"),
checks = {
"testingest_sample_1": [
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -357,7 +353,7 @@ def test_ingest_schema(client, investigation, schemadir, case):
for name in case.data:
datasets.append(client.new("Dataset", name=name))
reader = IngestReader(client, case.metadata, investigation)
- with case.schema.open("rb") as f:
+ with get_icatdata_schema().open("rb") as f:
schema = etree.XMLSchema(etree.parse(f))
assert schema.validate(reader.infile)
@@ -406,7 +402,6 @@ def test_ingest(client, investigation, samples, schemadir, case):
Case(
data = ["testingest_io_1"],
metadata = io_metadata,
- schema = gettestdata("icatdata-4.4.xsd"),
checks = {
"testingest_io_1": [
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -529,28 +524,24 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
Case(
data = [],
metadata = invalid_root_metadata,
- schema = None,
checks = {},
marks = (),
),
Case(
data = [],
metadata = invalid_ver_metadata,
- schema = None,
checks = {},
marks = (),
),
Case(
data = ["testingest_err_invalid_ref"],
metadata = invalid_ref_metadata,
- schema = gettestdata("icatdata-4.4.xsd"),
checks = {},
marks = (),
),
Case(
data = ["testingest_err_invalid_dup"],
metadata = invalid_dup_metadata,
- schema = gettestdata("icatdata-4.4.xsd"),
checks = {},
marks = (),
),
@@ -558,7 +549,6 @@ def test_ingest_fileobj(client, investigation, samples, schemadir, case):
data = ["testingest_err_invalid_dup_id_1",
"testingest_err_invalid_dup_id_2"],
metadata = invalid_dup_id_metadata,
- schema = gettestdata("icatdata-4.4.xsd"),
checks = {},
marks = (),
),
@@ -614,14 +604,12 @@ def test_ingest_error_invalid(client, investigation, schemadir, case):
Case(
data = ["testingest_err_search_attr"],
metadata = searcherr_attr_metadata,
- schema = gettestdata("icatdata-4.4.xsd"),
checks = {},
marks = (),
),
Case(
data = ["testingest_err_search_ref"],
metadata = searcherr_ref_metadata,
- schema = gettestdata("icatdata-4.4.xsd"),
checks = {},
marks = (),
),
@@ -642,7 +630,6 @@ def test_ingest_error_searcherr(client, investigation, schemadir, case):
Case(
data = ["testingest_custom_icatingest_1"],
metadata = gettestdata("metadata-custom-icatingest.xml"),
- schema = gettestdata("icatdata-4.4.xsd"),
checks = {
"testingest_custom_icatingest_1": [
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -662,7 +649,6 @@ def test_ingest_error_searcherr(client, investigation, schemadir, case):
Case(
data = ["testingest_custom_myingest_1"],
metadata = gettestdata("metadata-custom-myingest.xml"),
- schema = gettestdata("icatdata-4.4.xsd"),
checks = {
"testingest_custom_myingest_1": [
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
@@ -714,3 +700,38 @@ def test_custom_ingest(client, investigation, samples, schemadir, case):
ds = client.assertedSearch(query)[0]
for query, res in case.checks[name]:
assert client.assertedSearch(query % ds.id)[0] == res
+
+
+env_cases = [
+ Case(
+ data = ["testingest_inl_1", "testingest_inl_2"],
+ metadata = gettestdata("metadata-4.4-inl.xml"),
+ checks = {},
+ marks = (),
+ ),
+]
+@pytest.mark.parametrize("case", [
+ pytest.param(c, id=c.metadata.name, marks=c.marks) for c in env_cases
+])
+def test_ingest_env(monkeypatch, client, investigation, schemadir, case):
+ """Test using the _environment element.
+
+ Applying a custom XSLT that extracts an attribute from the
+ _environment element that is injected by IngestReader into the
+ input data and puts that values into the head element of the
+ transformed input. This is to test that adding the _environment
+ element works and it is in principle possible to make use of the
+ values in the XSLT.
+ """
+ monkeypatch.setattr(IngestReader,
+ "XSLT_Map", dict(icatingest="ingest-env.xslt"))
+ datasets = []
+ for name in case.data:
+ datasets.append(client.new("Dataset", name=name))
+ reader = IngestReader(client, case.metadata, investigation)
+ with get_icatdata_schema().open("rb") as f:
+ schema = etree.XMLSchema(etree.parse(f))
+ assert schema.validate(reader.infile)
+ version_elem = reader.infile.xpath("/icatdata/head/apiversion")
+ assert version_elem
+ assert version_elem[0].text == str(client.apiversion)