Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scicat object schema rendering using jinja template. #49

Merged
merged 4 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
include src/scicat_schemas/dataset.schema.json.jinja
include src/scicat_schemas/origdatablock.schema.json.jinja
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dependencies = [
"ess-streaming-data-types",
"graypy",
"h5py",
"jinja2",
"kafka-python",
"requests",
"rich"
Expand Down
47 changes: 0 additions & 47 deletions scicat_schemas/dataset.schema.json

This file was deleted.

49 changes: 49 additions & 0 deletions src/scicat_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)
from jinja2 import Template
from scicat_path_helpers import get_dataset_schema_template_path


def build_dataset_schema(
*,
nxs_dataset_pid: str,
dataset_name: str,
dataset_description: str,
principal_investigator: str,
facility: str,
environment: str,
scientific_metadata: str,
owner: str,
owner_email: str,
source_folder: str,
contact_email: str,
iso_creation_time: str,
technique_pid: str,
technique_name: str,
instrument_id: str,
sample_id: str,
proposal_id: str,
owner_group: str,
access_groups: list[str],
) -> str:
return Template(get_dataset_schema_template_path().read_text()).render(
nxs_dataset_pid=nxs_dataset_pid,
dataset_name=dataset_name,
dataset_description=dataset_description,
principal_investigator=principal_investigator,
facility=facility,
environment=environment,
scientific_metadata=scientific_metadata,
owner=owner,
owner_email=owner_email,
source_folder=source_folder,
contact_email=contact_email,
iso_creation_time=iso_creation_time,
technique_pid=technique_pid,
technique_name=technique_name,
instrument_id=instrument_id,
sample_id=sample_id,
proposal_id=proposal_id,
owner_group=owner_group,
access_groups=access_groups,
)
14 changes: 14 additions & 0 deletions src/scicat_path_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,17 @@ def select_target_directory(
return file_path.parent / pathlib.Path(fh_options.ingestor_files_directory)
else:
return pathlib.Path(fh_options.local_output_directory)


def get_dataset_schema_template_path() -> pathlib.Path:
"""Get the path to the dataset schema template."""
return pathlib.Path(__file__).parent / pathlib.Path(
"scicat_schemas/dataset.schema.json.jinja"
)


def get_origdatablock_schema_template_path() -> pathlib.Path:
"""Get the path to the dataset schema template."""
return pathlib.Path(__file__).parent / pathlib.Path(
"scicat_schemas/origdatablock.schema.json.jinja"
)
Empty file added src/scicat_schemas/__init__.py
Empty file.
30 changes: 30 additions & 0 deletions src/scicat_schemas/dataset.schema.json.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"pid": "{{ nxs_dataset_pid }}",
"datasetName": "{{ dataset_name }}",
"description": "{{ dataset_description }}",
"principalInvestigator": "{{ principal_investigator }}",
"creationLocation": "{{ facility }}:{{ environment }}",
"scientificMetadata": {
{{ scientific_metadata }}
},
"owner": "{{ owner }}",
"ownerEmail": "{{ owner_email }}",
"sourceFolder": "{{ source_folder }}",
"contactEmail": "{{ contact_email }}",
"creationTime": "{{ iso_creation_time }}",
"type": "raw",
"techniques": [
{
"pid": "{{ technique_pid }}",
"names": "{{ technique_name }}"
}
],
"instrumentId": "{{ instrument_id }}",
"sampleId": "{{ sample_id }}",
"proposalId": "{{ proposal_id }}",
"ownerGroup": "{{ owner_group }}",
"accessGroups": [
{% for access_group in access_groups %}"{{ access_group }}"{% if not loop.last %},
{% endif %}{% endfor %}
]
}
18 changes: 18 additions & 0 deletions tests/test_scicat_path_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)


def test_dataset_schema_path_helper() -> None:
from scicat_path_helpers import get_dataset_schema_template_path

path = get_dataset_schema_template_path()
assert path.name == "dataset.schema.json.jinja"
assert path.exists()


def test_origdatablock_schema_path_helper() -> None:
from scicat_path_helpers import get_origdatablock_schema_template_path

path = get_origdatablock_schema_template_path()
assert path.name == "origdatablock.schema.json.jinja"
assert path.exists()
93 changes: 93 additions & 0 deletions tests/test_scicat_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)


_example_scientific_metadata = """"run_number": {
"value": 18856,
"unit": "",
"human_name": "Run Number",
"type": "integer"
},
"sample_temperature": {
"value": 20.4,
"unit": "C",
"human_name": "Sample Temperature",
"type": "quantity"
},
"start_time" : {
"value" : "2024-07-16T09:30:12.987Z",
"unit" : "",
"human_name" : "Start Time",
"type" : "date"
}"""

_example_dataset_schema = (
"""
{
"pid": "12.234.34567/e3690b21-ee8c-40d6-9409-6b6fdca776d2",
"datasetName": "this is a dataset",
"description": "this is the description of the dataset",
"principalInvestigator": "Somebodys Name",
"creationLocation": "ESS:CODA",
"scientificMetadata": {
"""
+ _example_scientific_metadata
+ """
},
"owner": "Somebodys Name",
"ownerEmail": "someones_@_email",
"sourceFolder": "/ess/data/coda/2024/616254",
"contactEmail": "someones_@_email",
"creationTime": "2024-07-16T10:00:00.000Z",
"type": "raw",
"techniques": [
{
"pid": "someprotocol://someones/url/and/id",
"names": "absorption and phase contrast nanotomography"
}
],
"instrumentId": "12.234.34567/765b3dc3-f658-410e-b371-04dd1adcd520",
"sampleId": "bd31725a-dbfd-4c32-87db-1c1ebe61e5ca",
"proposalId": "616254",
"ownerGroup": "ess_proposal_616254",
"accessGroups": [
"scientific information management systems group",
"scicat group"
]
}

"""
)


def test_dataset_schema_rendering() -> None:
import json

from scicat_dataset import build_dataset_schema

dataset_schema = build_dataset_schema(
nxs_dataset_pid="12.234.34567/e3690b21-ee8c-40d6-9409-6b6fdca776d2",
dataset_name="this is a dataset",
dataset_description="this is the description of the dataset",
principal_investigator="Somebodys Name",
facility="ESS",
environment="CODA",
scientific_metadata=_example_scientific_metadata,
owner="Somebodys Name",
owner_email="someones_@_email",
source_folder="/ess/data/coda/2024/616254",
contact_email="someones_@_email",
iso_creation_time="2024-07-16T10:00:00.000Z",
technique_pid="someprotocol://someones/url/and/id",
technique_name="absorption and phase contrast nanotomography",
instrument_id="12.234.34567/765b3dc3-f658-410e-b371-04dd1adcd520",
sample_id="bd31725a-dbfd-4c32-87db-1c1ebe61e5ca",
proposal_id="616254",
owner_group="ess_proposal_616254",
access_groups=[
"scientific information management systems group",
"scicat group",
],
)

assert json.loads(dataset_schema) == json.loads(_example_dataset_schema)