Skip to content

Commit

Permalink
start on extended metadata (ACDD) validators
Browse files Browse the repository at this point in the history
Closes #40
  • Loading branch information
DocOtak committed Nov 21, 2024
1 parent b45fc41 commit a0c85f8
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 0 deletions.
48 changes: 48 additions & 0 deletions cchdo/hydro/metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from collections import defaultdict
from itertools import groupby, islice
from logging import getLogger

import xarray as xr

log = getLogger(__name__)


def all_equal(iterable, key=None):
"Returns True if all the elements are equal to each other."
# see https://docs.python.org/3/library/itertools.html#itertools-recipes
return len(list(islice(groupby(iterable, key), 2))) <= 1


SAME_KEYS = (
"processing_level",
"comment",
"creator_name",
)
ALLOWED_DIFFER = (
"date_modified",
"date_metadata_modified",
)


def validate(ds: xr.Dataset):
exceptions = []

# cannot use filter_by_attrs since we want to filter on the coordinates too
projects = defaultdict(list)
for name, da in ds.variables.items():
if (project := da.attrs.get("project")) is not None:
projects[project].append(name)

for project, vars in projects.items():
log.debug(f"Checking project '{project}' which includes {vars}")
for key in SAME_KEYS:
values = {var: ds[var].attrs.get(key) for var in vars}
valid = all_equal(values.values())
if not valid:
exception = ValueError(
f"Project '{project}' key '{key}' is not the same: {values}"
)
log.debug(exception)
exceptions.append(exception)
if exceptions:
raise ExceptionGroup("Metadata has failed to validate", exceptions)
24 changes: 24 additions & 0 deletions cchdo/hydro/tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from io import BytesIO

import pytest

from cchdo.hydro import read_csv
from cchdo.hydro.metadata import validate


def test_invalid_createor_name():
test_data = BytesIO(
b"""EXPOCODE,STNNBR,CASTNO,SAMPNO,LATITUDE,LONGITUDE,DATE,TIME,CTDPRS [DBAR],SILCAT [UMOL/KG],SILCAT [UMOL/KG]_FLAG_W,PHSPHT [UMOL/KG],PHSPHT [UMOL/KG]_FLAG_W
TEST,1,1,1,0,0,20220101,0000,0,0,2,0,3"""
)
ds = read_csv(test_data)

ds.silicate.attrs["project"] = "nutrients"
ds.phosphate.attrs["project"] = "nutrients"

ds.silicate.attrs["creator_name"] = "Susan Becker"

with pytest.raises(ExceptionGroup) as excinfo:
validate(ds)

assert excinfo.group_contains(ValueError)

0 comments on commit a0c85f8

Please sign in to comment.