Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
RKrahl committed Oct 11, 2024
2 parents 902c427 + d204131 commit 647b465
Show file tree
Hide file tree
Showing 10 changed files with 433 additions and 116 deletions.
25 changes: 25 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,31 @@ Changelog
=========


.. _changes-1_5_0:

1.5.0 (2024-10-11)
~~~~~~~~~~~~~~~~~~

New features
------------

+ `#160`_, `#161`_, `#163`_: Add class attributes to
:class:`icat.ingest.IngestReader` to make some prescribed values in
the transformation to ICAT data file format configurable.

Bug fixes and minor changes
---------------------------

+ `#162`_: Minor updates in the tool chain
+ `#164`_: Fix `dumpinvestigation.py` example script

.. _#160: https://github.com/icatproject/python-icat/issues/160
.. _#161: https://github.com/icatproject/python-icat/pull/161
.. _#162: https://github.com/icatproject/python-icat/pull/162
.. _#163: https://github.com/icatproject/python-icat/pull/163
.. _#164: https://github.com/icatproject/python-icat/pull/164


.. _changes-1_4_0:

1.4.0 (2024-08-30)
Expand Down
264 changes: 163 additions & 101 deletions doc/examples/dumpinvestigation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,130 +17,192 @@

logging.basicConfig(level=logging.INFO)

formats = icat.dumpfile.Backends.keys()
config = icat.config.Config()
config.add_variable('file', ("-o", "--outputfile"),
dict(help="output file name or '-' for stdout"),
default='-')
config.add_variable('format', ("-f", "--format"),
dict(help="output file format", choices=formats),
default='YAML')
config.add_variable('investigation', ("investigation",),
dict(help="name and optionally visit id "
"(separated by a colon) of the investigation"))
client, conf = config.getconfig()

if client.apiversion < '4.4':
raise RuntimeError("Sorry, ICAT version %s is too old, need 4.4.0 or newer."
% client.apiversion)
client.login(conf.auth, conf.credentials)


# ------------------------------------------------------------
# helper
# ------------------------------------------------------------

def getinvestigation(invid):
def get_investigation_id(client, invid):
"""Search the investigation id from name and optionally visitid."""
query = Query(client, "Investigation", attributes=["id"])
l = invid.split(':')
if len(l) == 1:
# No colon, invid == name
searchexp = "Investigation.id [name='%s']" % tuple(l)
elif len(l) == 2:
query.addConditions({"name": "= '%s'" % l[0]})
if len(l) == 2:
# one colon, invid == name:visitId
searchexp = "Investigation.id [name='%s' AND visitId='%s']" % tuple(l)
query.addConditions({"visitId": "= '%s'" % l[1]})
else:
# too many colons
raise RuntimeError("Invalid investigation identifier '%s'" % invid)
return (client.assertedSearch(searchexp)[0])
return client.assertedSearch(query)[0]

def mergesearch(sexps):
def mergesearch(client, queries):
"""Do many searches and merge the results in one list excluding dups."""
objs = set()
for se in sexps:
for se in queries:
objs.update(client.search(se))
return list(objs)

# The following helper functions control what ICAT objects are written
# in each of the dumpfile chunks. There are three options for the
# items in each list: either queries expressed as Query objects, or
# queries expressed as string expressions, or lists of objects. In
# the first two cases, the search results will be written, in the last
# case, the objects are written as provided.

def get_auth_types(client, invid):
"""Users and groups related to the investigation.
"""
# We need the users related to our investigation via
# InvestigationUser, the users member of one of the groups related
# via InvestigationGroup, and the instrument scientists from the
# instruments related to the investigations. These are
# independent searches, but the results are likely to overlap. So
# we need to search and merge results first.
usersearch = [
Query(client, "User", conditions={
"investigationUsers."
"investigation.id": "= %d" % invid,
}),
Query(client, "User", conditions={
"userGroups.grouping.investigationGroups."
"investigation.id": "= %d" % invid,
}),
Query(client, "User", conditions={
"instrumentScientists.instrument.investigationInstruments."
"investigation.id": "= %d" % invid,
}),
]
return [
mergesearch(client, usersearch),
Query(client, "Grouping", conditions={
"investigationGroups.investigation.id": "= %d" % invid,
}, includes=["userGroups.user"], aggregate="DISTINCT", order=True),
]

def get_static_types(client, invid):
"""Static stuff that exists independently of the investigation in ICAT.
"""
# Similar situation for ParameterType as for User: need to merge
# ParameterType used for InvestigationParameter, SampleParameter,
# DatasetParameter, and DatafileParameter.
ptsearch = [
Query(client, "ParameterType", conditions={
"investigationParameters."
"investigation.id": "= %d" % invid,
}, includes=["facility", "permissibleStringValues"]),
Query(client, "ParameterType", conditions={
"sampleParameters.sample."
"investigation.id": "= %d" % invid,
}, includes=["facility", "permissibleStringValues"]),
Query(client, "ParameterType", conditions={
"datasetParameters.dataset."
"investigation.id": "= %d" % invid,
}, includes=["facility", "permissibleStringValues"]),
Query(client, "ParameterType", conditions={
"datafileParameters.datafile.dataset."
"investigation.id": "= %d" % invid,
}, includes=["facility", "permissibleStringValues"]),
]
return [
Query(client, "Facility",
conditions={
"investigations.id": "= %d" % invid,
},
order=True),
Query(client, "Instrument",
conditions={
"investigationInstruments.investigation.id": "= %d" % invid,
},
includes=["facility", "instrumentScientists.user"],
order=True),
mergesearch(client, ptsearch),
Query(client, "InvestigationType",
conditions={
"investigations.id": "= %d" % invid,
},
includes=["facility"],
order=True),
Query(client, "SampleType",
conditions={
"samples.investigation.id": "= %d" % invid,
},
includes=["facility"],
aggregate="DISTINCT",
order=True),
Query(client, "DatasetType",
conditions={
"datasets.investigation.id": "= %d" % invid,
},
includes=["facility"],
aggregate="DISTINCT",
order=True),
Query(client, "DatafileFormat",
conditions={
"datafiles.dataset.investigation.id": "= %d" % invid,
},
includes=["facility"],
aggregate="DISTINCT",
order=True),
]

def get_investigation_types(client, invid):
"""The investigation and all the stuff that belongs to it.
"""
# The set of objects to be included in the Investigation.
inv_includes = {
"facility", "type.facility", "investigationInstruments",
"investigationInstruments.instrument.facility", "shifts",
"keywords", "publications", "investigationUsers",
"investigationUsers.user", "investigationGroups",
"investigationGroups.grouping", "parameters",
"parameters.type.facility"
}
return [
Query(client, "Investigation",
conditions={"id":"in (%d)" % invid},
includes=inv_includes),
Query(client, "Sample",
conditions={"investigation.id":"= %d" % invid},
includes={"investigation", "type.facility",
"parameters", "parameters.type.facility"},
order=True),
Query(client, "Dataset",
conditions={"investigation.id":"= %d" % invid},
includes={"investigation", "type.facility", "sample",
"parameters", "parameters.type.facility"},
order=True),
Query(client, "Datafile",
conditions={"dataset.investigation.id":"= %d" % invid},
includes={"dataset", "datafileFormat.facility",
"parameters", "parameters.type.facility"},
order=True)
]

# ------------------------------------------------------------
# Do it
# ------------------------------------------------------------

invid = getinvestigation(conf.investigation)

formats = icat.dumpfile.Backends.keys()
config = icat.config.Config()
config.add_variable('file', ("-o", "--outputfile"),
dict(help="output file name or '-' for stdout"),
default='-')
config.add_variable('format', ("-f", "--format"),
dict(help="output file format", choices=formats),
default='YAML')
config.add_variable('investigation', ("investigation",),
dict(help="name and optionally visit id "
"(separated by a colon) of the investigation"))
client, conf = config.getconfig()

# We need the users related to our investigation via
# InvestigationUser, the users member of one of the groups related via
# InvestigationGroup, and the instrument scientists from the
# instruments related to the investigations. These are independent
# searches, but the results are likely to overlap. So we need to
# search and merge results first. Similar situation for ParameterType.
usersearch = [("User <-> InvestigationUser <-> Investigation [id=%d]"),
("User <-> UserGroup <-> Grouping <-> InvestigationGroup "
"<-> Investigation [id=%d]"),
("User <-> InstrumentScientist <-> Instrument "
"<-> InvestigationInstrument <-> Investigation [id=%d]")]
ptsearch = [("ParameterType INCLUDE Facility, PermissibleStringValue "
"<-> InvestigationParameter <-> Investigation [id=%d]"),
("ParameterType INCLUDE Facility, PermissibleStringValue "
"<-> SampleParameter <-> Sample <-> Investigation [id=%d]"),
("ParameterType INCLUDE Facility, PermissibleStringValue "
"<-> DatasetParameter <-> Dataset <-> Investigation [id=%d]"),
("ParameterType INCLUDE Facility, PermissibleStringValue "
"<-> DatafileParameter <-> Datafile <-> Dataset "
"<-> Investigation [id=%d]"), ]
if client.apiversion < '4.4':
raise RuntimeError("Sorry, ICAT version %s is too old, need 4.4.0 or newer."
% client.apiversion)
client.login(conf.auth, conf.credentials)

# The set of objects to be included in the Investigation.
inv_includes = { "facility", "type.facility", "investigationInstruments",
"investigationInstruments.instrument.facility", "shifts",
"keywords", "publications", "investigationUsers",
"investigationUsers.user", "investigationGroups",
"investigationGroups.grouping", "parameters",
"parameters.type.facility" }

# The following lists control what ICAT objects are written in each of
# the dumpfile chunks. There are three options for the items in each
# list: either queries expressed as Query objects, or queries
# expressed as string expressions, or lists of objects. In the first
# two cases, the seacrh results will be written, in the last case, the
# objects are written as provided. We assume that there is only one
# relevant facility, e.g. that all objects related to the
# investigation are related to the same facility. We may thus ommit
# the facility from the ORDER BY clauses.
authtypes = [mergesearch([s % invid for s in usersearch]),
("Grouping ORDER BY name INCLUDE UserGroup, User "
"<-> InvestigationGroup <-> Investigation [id=%d]" % invid)]
statictypes = [("Facility ORDER BY name"),
("Instrument ORDER BY name "
"INCLUDE Facility, InstrumentScientist, User "
"<-> InvestigationInstrument <-> Investigation [id=%d]"
% invid),
(mergesearch([s % invid for s in ptsearch])),
("InvestigationType ORDER BY name INCLUDE Facility "
"<-> Investigation [id=%d]" % invid),
("SampleType ORDER BY name, molecularFormula INCLUDE Facility "
"<-> Sample <-> Investigation [id=%d]" % invid),
("DatasetType ORDER BY name INCLUDE Facility "
"<-> Dataset <-> Investigation [id=%d]" % invid),
("DatafileFormat ORDER BY name, version INCLUDE Facility "
"<-> Datafile <-> Dataset <-> Investigation [id=%d]" % invid)]
investtypes = [Query(client, "Investigation",
conditions={"id":"in (%d)" % invid},
includes=inv_includes),
Query(client, "Sample", order=["name"],
conditions={"investigation.id":"= %d" % invid},
includes={"investigation", "type.facility",
"parameters", "parameters.type.facility"}),
Query(client, "Dataset", order=["name"],
conditions={"investigation.id":"= %d" % invid},
includes={"investigation", "type.facility", "sample",
"parameters", "parameters.type.facility"}),
Query(client, "Datafile", order=["dataset.name", "name"],
conditions={"dataset.investigation.id":"= %d" % invid},
includes={"dataset", "datafileFormat.facility",
"parameters", "parameters.type.facility"})]
invid = get_investigation_id(client, conf.investigation)

with open_dumpfile(client, conf.file, conf.format, 'w') as dumpfile:
dumpfile.writedata(authtypes)
dumpfile.writedata(statictypes)
dumpfile.writedata(investtypes)
dumpfile.writedata(get_auth_types(client, invid))
dumpfile.writedata(get_static_types(client, invid))
dumpfile.writedata(get_investigation_types(client, invid))
2 changes: 1 addition & 1 deletion doc/examples/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
The script takes the name of an investigation as argument. The
investigation MUST exist in ICAT beforehand and all datasets in the
input directory MUST belong to this investigation. The script will
create tha datasets in ICAT, e.g. they MUST NOT exist in ICAT
create the datasets in ICAT, e.g. they MUST NOT exist in ICAT
beforehand. The metadata input file may contain attributes and
related objects (datasetInstrument, datasetTechnique,
datasetParameter) for the datasets provided in the input directory.
Expand Down
10 changes: 10 additions & 0 deletions doc/src/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config

import os
from pathlib import Path
import sys

Expand Down Expand Up @@ -135,6 +136,15 @@ def make_meta_rst(last_release):
#
# html_theme_options = {}

# Define the canonical URL if you are using a custom domain on Read the Docs
html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "")

# Tell Jinja2 templates the build is running on Read the Docs
if os.environ.get("READTHEDOCS", "") == "True":
if "html_context" not in globals():
html_context = {}
html_context["READTHEDOCS"] = True

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
Expand Down
8 changes: 8 additions & 0 deletions doc/src/ingest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ format of the input files may be customized to some extent by
providing custom versions of XSD and XSLT files, see
:ref:`ingest-customize` below.

Some attributes and relations of the ``Dataset`` objects are
prescribed during the transformation into ICAT data file format,
namely the ``complete`` attribute and the name of the ``DatasetType``
to relate them to. The prescribed values are set in class attributes
:attr:`~icat.ingest.IngestReader.Dataset_complete` and
:attr:`~icat.ingest.IngestReader.DatasetType_name` respectively. They
may be customized by overriding these class attributes.

The ``Dataset`` objects in the input will not be created by
:class:`~icat.ingest.IngestReader`, because it is assumed that a
separate workflow in the caller will copy the content of datafiles to
Expand Down
12 changes: 10 additions & 2 deletions etc/ingest.xslt
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,22 @@
<xsl:template match="/icatingest/data/dataset">
<dataset>
<xsl:copy-of select="@id"/>
<complete>false</complete>
<xsl:element name="complete">
<xsl:value-of
select="/icatingest/_environment/@dataset_complete"/>
</xsl:element>
<xsl:copy-of select="description"/>
<xsl:copy-of select="endDate"/>
<xsl:copy-of select="name"/>
<xsl:copy-of select="startDate"/>
<investigation ref="_Investigation"/>
<xsl:apply-templates select="sample"/>
<type name="raw"/>
<xsl:element name="type">
<xsl:attribute name="name">
<xsl:value-of
select="/icatingest/_environment/@datasettype_name"/>
</xsl:attribute>
</xsl:element>
<xsl:copy-of select="datasetInstruments"/>
<xsl:copy-of select="datasetTechniques"/>
<xsl:copy-of select="parameters"/>
Expand Down
Loading

0 comments on commit 647b465

Please sign in to comment.