Skip to content

Commit

Permalink
Merge pull request #84 from SciCatProject/fix_for_testing
Browse files Browse the repository at this point in the history
Fix for testing
  • Loading branch information
YooSunYoung authored Nov 1, 2024
2 parents f7361e5 + c933985 commit 344362e
Show file tree
Hide file tree
Showing 11 changed files with 83 additions and 21 deletions.
1 change: 1 addition & 0 deletions resources/base.imsc.json.example
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"order": 1,
"id": "c5bed39a-4379-11ef-ba5a-ffbc783163b6",
"name" : "Generic metadata schema",
"instrument" : "",
Expand Down
1 change: 1 addition & 0 deletions resources/coda.imsc.json.example
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"order": 1,
"id" : "715ce7ba-3f91-11ef-932f-37a5c6fd60b1",
"name" : "Coda Metadata Schema",
"instrument": "coda",
Expand Down
3 changes: 2 additions & 1 deletion resources/config.sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
"hash_file_extension": "b2b",
"ingestor_files_directory": "../ingestor",
"message_to_file": true,
"message_file_extension": "message.json"
"message_file_extension": "message.json",
"use_full_file_path": false
}
},
"kafka": {
Expand Down
1 change: 1 addition & 0 deletions resources/dream.imsc.json.example
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"order": 1,
"id" : "72a991ee-437a-11ef-8fd2-1f95660accb7",
"name" : "dream Metadata Schema",
"instrument": "dream",
Expand Down
1 change: 1 addition & 0 deletions resources/loki.imsc.json.example
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"order": 1,
"id" : "891322f6-437a-11ef-980a-7bdc756bd0b3",
"name" : "Loki Metadata Schema",
"instrument": "loki",
Expand Down
13 changes: 6 additions & 7 deletions src/scicat_communication.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)
import json
import logging
from dataclasses import asdict
from typing import Any
from urllib.parse import quote, urljoin

Expand Down Expand Up @@ -107,15 +108,13 @@ def create_scicat_origdatablock(
return result


def render_full_url(
url: str,
config: SciCatOptions,
) -> str:
def render_full_url(url: str, config: SciCatOptions) -> str:
urls = asdict(config.urls)
if not url.startswith("http://") and not url.startswith("https://"):
for endpoint in config.urls.keys():
for endpoint in urls.keys():
if url.startswith(endpoint):
url = url.replace(endpoint, config.urls[endpoint])
break
return url.replace(endpoint, urls[endpoint])

return url


Expand Down
1 change: 1 addition & 0 deletions src/scicat_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ class FileHandlingOptions:
ingestor_files_directory: str = "../ingestor"
message_to_file: bool = True
message_file_extension: str = "message.json"
use_full_file_path: bool = False


@dataclass(kw_only=True)
Expand Down
43 changes: 36 additions & 7 deletions src/scicat_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,15 @@ def to_date(value: Any) -> str | None:


def to_dict(value: Any) -> dict:
if isinstance(value, str):
result = ast.literal_eval(value)
if isinstance(result, dict):
return result
else:
raise ValueError(
"Invalid value. Must be able to convert to a dictionary. Got ", value
)

return dict(value)


Expand Down Expand Up @@ -93,9 +102,20 @@ def convert_to_type(input_value: Any, dtype_desc: str) -> Any:
"join_with_space": lambda value: ", ".join(
ast.literal_eval(value) if isinstance(value, str) else value
),
"evaluate": lambda value: ast.literal_eval(value),
# "evaluate": lambda value: ast.literal_eval(value),
# We are not adding the evaluate function here since
# ``evaluate`` function should be avoided if possible.
# It might seem easy to use, but it is very easy to break
# when the input is not as expected.
# It is better to use the specific converters for the types.
# However, if it is the only way to go, you can add it here.
# Please add a comment to explain why it is needed.
"filename": lambda value: os.path.basename(value),
"dirname": lambda value: os.path.dirname(value),
"dirname-2": lambda value: os.path.dirname(os.path.dirname(value)),
"getitem": lambda value, key: value[
key
], # The only operator that takes an argument
}
)

Expand Down Expand Up @@ -129,6 +149,7 @@ def extract_variables_values(
config: OfflineIngestorConfig,
) -> dict:
variable_map = {
"ingestor_run_id": str(uuid.uuid4()),
"filepath": pathlib.Path(config.nexus_file),
"now": datetime.datetime.now(tz=datetime.UTC).isoformat(),
}
Expand All @@ -137,12 +158,13 @@ def extract_variables_values(
if isinstance(variable_recipe, NexusFileMetadataVariable):
value = _retrieve_values_from_file(variable_recipe, h5file)
elif isinstance(variable_recipe, ScicatMetadataVariable):
full_endpoint_url = render_full_url(
render_variable_value(variable_recipe.url, variable_map),
config.scicat,
)
value = retrieve_value_from_scicat(
config=config.scicat,
scicat_endpoint_url=render_full_url(
render_variable_value(variable_recipe.url, variable_map),
config.scicat,
),
scicat_endpoint_url=full_endpoint_url,
field_name=variable_recipe.field,
)
elif isinstance(variable_recipe, ValueMetadataVariable):
Expand All @@ -152,7 +174,12 @@ def extract_variables_values(
if isinstance(value, str)
else value
)
value = _get_operator(variable_recipe.operator)(value)
_operator = _get_operator(variable_recipe.operator)
if variable_recipe.field:
value = _operator(value, variable_recipe.field)
else:
value = _operator(value)

else:
raise Exception("Invalid variable source: ", source)
variable_map[variable_name] = convert_to_type(value, variable_recipe.value_type)
Expand All @@ -165,7 +192,7 @@ def extract_paths_from_h5_file(
_path: list[str],
) -> list[str]:
master_key = _path.pop(0)
output_paths = [master_key]
output_paths = []
if "*" in master_key:
temp_keys = [k2 for k2 in _h5_object.keys() if re.search(master_key, k2)]
for key in temp_keys:
Expand Down Expand Up @@ -216,6 +243,8 @@ class ScicatDataset:
proposalId: str | None = None
ownerGroup: str | None = None
accessGroups: list[str] | None = None
startTime: str | None = None
endTime: str | None = None


@dataclass(kw_only=True)
Expand Down
15 changes: 13 additions & 2 deletions src/scicat_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class ValueMetadataVariable(MetadataSchemaVariable):

operator: str = ""
value: str
field: str | None = None
# We only allow one field(argument) for now


@dataclass(kw_only=True)
Expand Down Expand Up @@ -110,6 +112,7 @@ class MetadataSchema:
name: str
instrument: str
selector: str | dict
order: int
variables: dict[str, MetadataSchemaVariable]
schema: dict[str, MetadataItem]

Expand Down Expand Up @@ -138,6 +141,12 @@ def from_file(cls, schema_file_name: pathlib.Path) -> "MetadataSchema":


def render_variable_value(var_value: str, variable_registry: dict) -> str:
# If it is only one variable, then it is a simple replacement
if (var_key := var_value.removesuffix(">").removeprefix("<")) in variable_registry:
return variable_registry[var_key]

# If it is a complex variable, then it is a combination of variables
# similar to f-string in python
for reg_var_name, reg_var_value in variable_registry.items():
var_value = var_value.replace("<" + reg_var_name + ">", str(reg_var_value))

Expand All @@ -158,11 +167,13 @@ def collect_schemas(dir_path: pathlib.Path) -> OrderedDict[str, MetadataSchema]:
MetadataSchema.from_file(schema_file_path)
for schema_file_path in list_schema_file_names(dir_path)
],
key=lambda schema: schema.name,
key=lambda schema: (schema.order, schema.name.capitalize()),
# name is capitalized to make sure that the order is
# alphabetically sorted in a non-case-sensitive way
)
schemas = OrderedDict()
for metadata_schema in metadata_schemas:
schemas[metadata_schema.name] = metadata_schema
schemas[metadata_schema.id] = metadata_schema
return schemas


Expand Down
7 changes: 6 additions & 1 deletion src/scicat_offline_ingestor.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,16 @@ def main() -> None:
)

# Collect data-file descriptions
if not config.ingestion.file_handling.use_full_file_path:
source_folder = variable_map["source_folder"]
else:
source_folder = None

data_file_list = create_data_file_list(
nexus_file=nexus_file_path,
ingestor_directory=ingestor_directory,
config=fh_options,
source_folder=variable_map["source_folder"],
source_folder=source_folder,
logger=logger,
# TODO: add done_writing_message_file and nexus_structure_file
)
Expand Down
18 changes: 15 additions & 3 deletions tests/test_scicat_metadata_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,25 @@ def test_collect_metadata_schema() -> None:
assert len(schemas) == len(ALL_SCHEMA_EXAMPLES)
for schema_name, schema in schemas.items():
assert isinstance(schema, MetadataSchema)
assert schema_name == schema.name
assert schema_name == schema.id

assert isinstance(schemas, OrderedDict)
# Check if the schema is ordered by the schema name
assert list(schemas.keys()) == sorted(schemas.keys())
# Check if the schema is ordered by the schema order and name.
# The expected keys are hardcoded on purpose.
# Always hardcode the expected keys to avoid the test being too flexible.
assert list(schemas.keys()) == [
"715ce7ba-3f91-11ef-932f-37a5c6fd60b1", # Coda, 1, Coda Metadata Schema
"72a991ee-437a-11ef-8fd2-1f95660accb7", # Dream, 1, dream Metadata Schema
"c5bed39a-4379-11ef-ba5a-ffbc783163b6", # Base, 1, Generic metadata schema
"891322f6-437a-11ef-980a-7bdc756bd0b3", # Loki, 1, Loki Metadata Schema
]


def test_metadata_schema_selection() -> None:
schemas = OrderedDict(
{
"schema1": MetadataSchema(
order=1,
id="schema1",
name="Schema 1",
instrument="",
Expand All @@ -67,6 +75,7 @@ def test_metadata_schema_selection() -> None:
schema={},
),
"schema2": MetadataSchema(
order=2,
id="schema2",
name="Schema 2",
instrument="",
Expand All @@ -75,6 +84,7 @@ def test_metadata_schema_selection() -> None:
schema={},
),
"schema3": MetadataSchema(
order=3,
id="schema3",
name="Schema 3",
instrument="",
Expand All @@ -96,6 +106,7 @@ def test_metadata_schema_selection_wrong_selector_target_name_raises() -> None:
OrderedDict(
{
"schema1": MetadataSchema(
order=1,
id="schema1",
name="Schema 1",
instrument="",
Expand All @@ -115,6 +126,7 @@ def test_metadata_schema_selection_wrong_selector_function_name_raises() -> None
OrderedDict(
{
"schema1": MetadataSchema(
order=1,
id="schema1",
name="Schema 1",
instrument="",
Expand Down

0 comments on commit 344362e

Please sign in to comment.