diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index d309a9c6..9d4a8beb 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -338,14 +338,14 @@ def _get_measure_variable( def _get_measure(obj: Union[DataArray, Dataset], key: str) -> List[str]: """ - Translate from cell measures ("area" or "volume") to appropriate variable name. + Translate from cell measures to appropriate variable name. This function interprets the ``cell_measures`` attribute on DataArrays. Parameters ---------- obj: DataArray, Dataset DataArray belonging to the coordinate to be checked - key: str, ["area", "volume"] + key: str key to check for. Returns @@ -353,12 +353,6 @@ def _get_measure(obj: Union[DataArray, Dataset], key: str) -> List[str]: List[str], Variable name(s) in parent xarray object that matches axis or coordinate `key` """ - valid_keys = _CELL_MEASURES - if key not in valid_keys: - raise KeyError( - f"cf_xarray did not understand key {key!r}. Expected one of {valid_keys!r}" - ) - if isinstance(obj, DataArray): obj = obj._to_temp_dataset() @@ -438,7 +432,7 @@ def _build_docstring(func): mapper_docstrings = { _get_axis_coord: f"One or more of {(_AXIS_NAMES + _COORD_NAMES)!r}", _get_axis_coord_single: f"One of {(_AXIS_NAMES + _COORD_NAMES)!r}", - _get_measure_variable: f"One of {_CELL_MEASURES!r}", + # _get_measure_variable: f"One of {_CELL_MEASURES!r}", } sig = inspect.signature(func) @@ -653,6 +647,18 @@ class CFAccessor: def __init__(self, da): self._obj = da + self._all_cell_measures = None + + def _get_all_cell_measures(self): + """ + Get all cell measures defined in the object, adding CF pre-defined measures. + """ + + # get all_cell_measures only once + if not self._all_cell_measures: + self._all_cell_measures = set(_CELL_MEASURES + tuple(self.cell_measures)) + + return self._all_cell_measures def _process_signature( self, @@ -833,7 +839,7 @@ def describe(self): text += "\nCell Measures:\n" measures = self.cell_measures - for key in _CELL_MEASURES: + for key in sorted(self._get_all_cell_measures()): text += f"\t{key}: {measures[key] if key in measures else []}\n" text += "\nStandard Names:\n" @@ -868,8 +874,7 @@ def keys(self) -> Set[str]: """ varnames = list(self.axes) + list(self.coordinates) - if not isinstance(self._obj, Dataset): - varnames.extend(list(self.cell_measures)) + varnames.extend(list(self.cell_measures)) varnames.extend(list(self.standard_names)) return set(varnames) @@ -930,15 +935,23 @@ def cell_measures(self) -> Dict[str, List[str]]: Returns ------- Dictionary of valid cell measure names that can be used with __getitem__ or .cf[key]. - Will be ("area", "volume") or a subset thereof. """ - measures = { - key: apply_mapper(_get_measure, self._obj, key, error=False) - for key in _CELL_MEASURES - } + obj = self._obj + all_attrs = [da.attrs.get("cell_measures", "") for da in obj.coords.values()] + if isinstance(obj, DataArray): + all_attrs += [obj.attrs.get("cell_measures", "")] + elif isinstance(obj, Dataset): + all_attrs += [ + da.attrs.get("cell_measures", "") for da in obj.data_vars.values() + ] + + measures: Dict[str, List[str]] = dict() + for attr in all_attrs: + for key, value in parse_cell_methods_attr(attr).items(): + measures[key] = measures.setdefault(key, []) + [value] - return {k: sorted(v) for k, v in measures.items() if v} + return {k: sorted(set(v)) for k, v in measures.items() if v} def get_standard_names(self) -> List[str]: @@ -1069,7 +1082,7 @@ def check_results(names, k): check_results(names, k) successful[k] = bool(names) coords.extend(names) - elif k in _CELL_MEASURES: + elif k in self._get_all_cell_measures(): measure = _get_measure(self._obj, k) check_results(measure, k) successful[k] = bool(measure) diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 49b88994..188636be 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -52,20 +52,30 @@ def test_coordinates(): assert actual == expected -def test_cell_measures(): +def test_cell_measures(capsys): ds = airds.copy(deep=True) ds["foo"] = xr.DataArray(ds["cell_area"], attrs=dict(standard_name="foo_std_name")) ds["air"].attrs["cell_measures"] += " foo_measure: foo" - assert "foo_std_name" in ds.cf["air_temperature"].cf + assert ("foo_std_name" in ds.cf["air_temperature"].cf) and ("foo_measure" in ds.cf) ds["air"].attrs["cell_measures"] += " volume: foo" - expected = dict(area=["cell_area"], volume=["foo"]) - actual = ds["air"].cf.cell_measures - assert actual == expected + ds["foo"].attrs["cell_measures"] = ds["air"].attrs["cell_measures"] + expected = dict(area=["cell_area"], foo_measure=["foo"], volume=["foo"]) + actual_air = ds["air"].cf.cell_measures + actual_foo = ds.cf["foo_measure"].cf.cell_measures + assert actual_air == actual_foo == expected actual = ds.cf.cell_measures assert actual == expected + ds.cf.describe() + actual = capsys.readouterr().out + expected = ( + "\nCell Measures:\n\tarea: ['cell_area']\n\tfoo_measure: ['foo']\n\tvolume: ['foo']\n" + "\nStandard Names:\n\tair_temperature: ['air']\n\tfoo_std_name: ['foo']\n\n" + ) + assert actual.endswith(expected) + def test_standard_names(): expected = dict( @@ -240,7 +250,10 @@ def test_kwargs_expand_key_to_multiple_keys(): @pytest.mark.parametrize( "obj, expected", [ - (ds, {"latitude", "longitude", "time", "X", "Y", "T", "air_temperature"}), + ( + ds, + {"latitude", "longitude", "time", "X", "Y", "T", "air_temperature", "area"}, + ), (ds.air, {"latitude", "longitude", "time", "X", "Y", "T", "area"}), (ds_no_attrs.air, set()), ], diff --git a/doc/examples/introduction.ipynb b/doc/examples/introduction.ipynb index 66bbb664..64b55cbd 100644 --- a/doc/examples/introduction.ipynb +++ b/doc/examples/introduction.ipynb @@ -559,10 +559,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Note:** Although it is possible to assign additional coordinates and cell\n", - "measures, `.cf.coordinates` and `.cf.cell_measures` only return a subset of\n", - "`(\"longitude\", \"latitude\", \"vertical\", \"time\")` and `(\"area\", \"volume\")`,\n", - "respectively.\n" + "**Note:** Although it is possible to assign additional coordinates,\n", + "`.cf.coordinates` only returns a subset of\n", + "`(\"longitude\", \"latitude\", \"vertical\", \"time\")`.\n" ] }, { @@ -927,8 +926,8 @@ "source": [ "## Feature: Weight by Cell Measures\n", "\n", - "`cf_xarray` can weight by cell measure variables `\"area\"` and `\"volume\"` if the\n", - "appropriate attribute is set\n" + "`cf_xarray` can weight by cell measure variables if the appropriate attribute is\n", + "set\n" ] }, { diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a372316c..a00f5d8d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -3,7 +3,7 @@ What's New v0.4.0 (unreleased) =================== - +- Support for arbitrary cell measures indexing. By `Mattia Almansi`_. v0.3.1 (Nov 25, 2020) =====================