Skip to content

Commit

Permalink
Merge pull request #49 from SciCatProject/scicat-object-refactor
Browse files Browse the repository at this point in the history
Scicat object schema rendering using jinja template.
  • Loading branch information
nitrosx authored Jul 22, 2024
2 parents 9ce75bb + cd0ffbc commit 448848f
Show file tree
Hide file tree
Showing 10 changed files with 207 additions and 47 deletions.
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
include src/scicat_schemas/dataset.schema.json.jinja
include src/scicat_schemas/origdatablock.schema.json.jinja
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dependencies = [
"ess-streaming-data-types",
"graypy",
"h5py",
"jinja2",
"kafka-python",
"requests",
"rich"
Expand Down
47 changes: 0 additions & 47 deletions scicat_schemas/dataset.schema.json

This file was deleted.

49 changes: 49 additions & 0 deletions src/scicat_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)
from jinja2 import Template
from scicat_path_helpers import get_dataset_schema_template_path


def build_dataset_schema(
*,
nxs_dataset_pid: str,
dataset_name: str,
dataset_description: str,
principal_investigator: str,
facility: str,
environment: str,
scientific_metadata: str,
owner: str,
owner_email: str,
source_folder: str,
contact_email: str,
iso_creation_time: str,
technique_pid: str,
technique_name: str,
instrument_id: str,
sample_id: str,
proposal_id: str,
owner_group: str,
access_groups: list[str],
) -> str:
return Template(get_dataset_schema_template_path().read_text()).render(
nxs_dataset_pid=nxs_dataset_pid,
dataset_name=dataset_name,
dataset_description=dataset_description,
principal_investigator=principal_investigator,
facility=facility,
environment=environment,
scientific_metadata=scientific_metadata,
owner=owner,
owner_email=owner_email,
source_folder=source_folder,
contact_email=contact_email,
iso_creation_time=iso_creation_time,
technique_pid=technique_pid,
technique_name=technique_name,
instrument_id=instrument_id,
sample_id=sample_id,
proposal_id=proposal_id,
owner_group=owner_group,
access_groups=access_groups,
)
14 changes: 14 additions & 0 deletions src/scicat_path_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,17 @@ def select_target_directory(
return file_path.parent / pathlib.Path(fh_options.ingestor_files_directory)
else:
return pathlib.Path(fh_options.local_output_directory)


def get_dataset_schema_template_path() -> pathlib.Path:
"""Get the path to the dataset schema template."""
return pathlib.Path(__file__).parent / pathlib.Path(
"scicat_schemas/dataset.schema.json.jinja"
)


def get_origdatablock_schema_template_path() -> pathlib.Path:
"""Get the path to the dataset schema template."""
return pathlib.Path(__file__).parent / pathlib.Path(
"scicat_schemas/origdatablock.schema.json.jinja"
)
Empty file added src/scicat_schemas/__init__.py
Empty file.
30 changes: 30 additions & 0 deletions src/scicat_schemas/dataset.schema.json.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"pid": "{{ nxs_dataset_pid }}",
"datasetName": "{{ dataset_name }}",
"description": "{{ dataset_description }}",
"principalInvestigator": "{{ principal_investigator }}",
"creationLocation": "{{ facility }}:{{ environment }}",
"scientificMetadata": {
{{ scientific_metadata }}
},
"owner": "{{ owner }}",
"ownerEmail": "{{ owner_email }}",
"sourceFolder": "{{ source_folder }}",
"contactEmail": "{{ contact_email }}",
"creationTime": "{{ iso_creation_time }}",
"type": "raw",
"techniques": [
{
"pid": "{{ technique_pid }}",
"names": "{{ technique_name }}"
}
],
"instrumentId": "{{ instrument_id }}",
"sampleId": "{{ sample_id }}",
"proposalId": "{{ proposal_id }}",
"ownerGroup": "{{ owner_group }}",
"accessGroups": [
{% for access_group in access_groups %}"{{ access_group }}"{% if not loop.last %},
{% endif %}{% endfor %}
]
}
File renamed without changes.
18 changes: 18 additions & 0 deletions tests/test_scicat_path_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)


def test_dataset_schema_path_helper() -> None:
from scicat_path_helpers import get_dataset_schema_template_path

path = get_dataset_schema_template_path()
assert path.name == "dataset.schema.json.jinja"
assert path.exists()


def test_origdatablock_schema_path_helper() -> None:
from scicat_path_helpers import get_origdatablock_schema_template_path

path = get_origdatablock_schema_template_path()
assert path.name == "origdatablock.schema.json.jinja"
assert path.exists()
93 changes: 93 additions & 0 deletions tests/test_scicat_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)


_example_scientific_metadata = """"run_number": {
"value": 18856,
"unit": "",
"human_name": "Run Number",
"type": "integer"
},
"sample_temperature": {
"value": 20.4,
"unit": "C",
"human_name": "Sample Temperature",
"type": "quantity"
},
"start_time" : {
"value" : "2024-07-16T09:30:12.987Z",
"unit" : "",
"human_name" : "Start Time",
"type" : "date"
}"""

_example_dataset_schema = (
"""
{
"pid": "12.234.34567/e3690b21-ee8c-40d6-9409-6b6fdca776d2",
"datasetName": "this is a dataset",
"description": "this is the description of the dataset",
"principalInvestigator": "Somebodys Name",
"creationLocation": "ESS:CODA",
"scientificMetadata": {
"""
+ _example_scientific_metadata
+ """
},
"owner": "Somebodys Name",
"ownerEmail": "someones_@_email",
"sourceFolder": "/ess/data/coda/2024/616254",
"contactEmail": "someones_@_email",
"creationTime": "2024-07-16T10:00:00.000Z",
"type": "raw",
"techniques": [
{
"pid": "someprotocol://someones/url/and/id",
"names": "absorption and phase contrast nanotomography"
}
],
"instrumentId": "12.234.34567/765b3dc3-f658-410e-b371-04dd1adcd520",
"sampleId": "bd31725a-dbfd-4c32-87db-1c1ebe61e5ca",
"proposalId": "616254",
"ownerGroup": "ess_proposal_616254",
"accessGroups": [
"scientific information management systems group",
"scicat group"
]
}
"""
)


def test_dataset_schema_rendering() -> None:
import json

from scicat_dataset import build_dataset_schema

dataset_schema = build_dataset_schema(
nxs_dataset_pid="12.234.34567/e3690b21-ee8c-40d6-9409-6b6fdca776d2",
dataset_name="this is a dataset",
dataset_description="this is the description of the dataset",
principal_investigator="Somebodys Name",
facility="ESS",
environment="CODA",
scientific_metadata=_example_scientific_metadata,
owner="Somebodys Name",
owner_email="someones_@_email",
source_folder="/ess/data/coda/2024/616254",
contact_email="someones_@_email",
iso_creation_time="2024-07-16T10:00:00.000Z",
technique_pid="someprotocol://someones/url/and/id",
technique_name="absorption and phase contrast nanotomography",
instrument_id="12.234.34567/765b3dc3-f658-410e-b371-04dd1adcd520",
sample_id="bd31725a-dbfd-4c32-87db-1c1ebe61e5ca",
proposal_id="616254",
owner_group="ess_proposal_616254",
access_groups=[
"scientific information management systems group",
"scicat group",
],
)

assert json.loads(dataset_schema) == json.loads(_example_dataset_schema)

0 comments on commit 448848f

Please sign in to comment.