GloFAS new version (#205)

* Add model version ption * Take into account the reforecast years * Include version in filename * added month limits * Refined the two model versions * Note in changelog * Fix forecast loading bug * Add fix to changelog * Fix v4 coordinates * Fixed new chirps bug
OCHA-DAP · Aug 24, 2023 · c9e49cb · c9e49cb
1 parent 40b3001
commit c9e49cb
Show file tree

Hide file tree

Showing 11 changed files with 307 additions and 129 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -8,6 +8,21 @@ Changelog <https://keepachangelog.com/en/1.0.0/>`__, and this project
 adheres to `Semantic
 Versioning <https://semver.org/spec/v2.0.0.html>`__.
 
+[1.1.3] - 2023-08-15
+--------------------
+
+Added
+~~~~~
+
+- Support for GloFAS version 4.0, note that this will break any GloFAS
+  filepaths downloaded with previous versions as the model version is now
+  added to the filename
+
+Fixed
+~~~~~
+
+- GloFAS forecast loading was broken due to missing time dimension
+
 [1.1.2] - 2023-07-04
 --------------------
 

diff --git a/docs/datasources/glofas.rst b/docs/datasources/glofas.rst
@@ -39,14 +39,17 @@ reporting points (those found on the map viewer)
 still need to be extracted. This GloFAS Python module takes
 care of all of these steps.
 
-A note about model versions: On 26 May 2021 GloFAS released `version 3
+Both the latest version (version 4, released summer 2023) and the
+previous version (version 3,
+`released May 2021
 <https://www.copernicus.eu/en/news/news/observer-whats-new-latest-glofas-31-release>`_
-of their model. While version 2 is still available on CDS, it
-will soon be phased out. Thus this module is currently only able
-to download version 3.
+)
+are supported. We do not support version 2 at this time.
 
 Before describing the module usage, we will outline the different
 GloFAS datasets available: reanalysis, forecast, and reforecast.
+All datasets have a spatial resolution of 0.05° in version 4,
+and 0.1° for version 3.
 
 Reanalysis
 ~~~~~~~~~~
@@ -55,7 +58,7 @@ GloFAS-ERA5 reanalysis
 `(Harrigan et al.)
 <https://essd.copernicus.org/articles/12/2043/2020/>`_
 is a global gridded dataset of river discharge with
-a daily timestep and resolution of 0.1°,
+a daily timestep,
 available from 1 January 1979 to the present day (updated daily).
 It is based on ECMWF's latest global atmospheric reanalysis
 `(ERA5, Herbasch et al.)
@@ -89,7 +92,7 @@ Twice per week (Mondays and Thursdays), the IFS is extended to run up to 46 days
 at a coarser resolution (~36 km). GloFAS makes available days
 16 to 30 for this extended forecast.
 
-Version 3 of the forecast data is available to download from 26 May 2021 until
+Both versions 3 and 4 of the forecast data are available to download from 26 May 2021 until
 present day (updated daily) `here
 <https://cds.climate.copernicus.eu/cdsapp#!/dataset/cems-glofas-forecast?tab=overview>`_.
 While CDS does have version 3 pre-release data from 2020-2021,
@@ -108,8 +111,10 @@ is similar to the forecast, but uses the ECMWF IFS
 <https://www.ecmwf.int/en/forecasts/documentation-and-support/extended-range/re-forecast-medium-and-extended-forecast-range>`_.
 The reforecast is initialized twice per week (Monday and Thursdays)
 and has 11 ensemble members.
-The data runs from January 1999 to December 2018,
-and is available to download
+The data runs from March 2003 to August 2022, and is only available
+for the months March to August.
+(For version 3 it runs from January 1999 to December 2018, for all months).
+The data can be downloaded
 `from CDS here
 <https://cds.climate.copernicus.eu/cdsapp#!/dataset/cems-glofas-reforecast?tab=overview>`_.
 

diff --git a/src/ochanticipy/datasources/chirps/chirps.py b/src/ochanticipy/datasources/chirps/chirps.py
@@ -427,7 +427,7 @@ def _get_file_name(
         file_name = (
             f"{file_name_base}"
             f"r{self._resolution}"
-            f"_{self._geobb.get_filename_repr(p=0)}.nc"
+            f"_{self._geobb.get_filename_repr(precision=0)}.nc"
         )
         return file_name
 
@@ -562,7 +562,7 @@ def _get_file_name(
             f"{file_name_base}"
             f"{day}_"
             f"r{self._resolution}"
-            f"_{self._geobb.get_filename_repr(p=0)}.nc"
+            f"_{self._geobb.get_filename_repr(precision=0)}.nc"
         )
         return file_name
 

diff --git a/src/ochanticipy/datasources/glofas/forecast.py b/src/ochanticipy/datasources/glofas/forecast.py
@@ -2,7 +2,7 @@
 import logging
 from datetime import date
 from pathlib import Path
-from typing import List, Union
+from typing import Union
 
 import xarray as xr
 from dateutil import rrule
@@ -15,81 +15,7 @@
 logger = logging.getLogger(__name__)
 
 
-class _GlofasForecastBase(glofas.Glofas):
-    """Base class for all GloFAS forecast data downloading and processing."""
-
-    def __init__(
-        self,
-        country_config: CountryConfig,
-        geo_bounding_box: GeoBoundingBox,
-        cds_name: str,
-        system_version: str,
-        product_type: Union[str, List[str]],
-        date_variable_prefix: str,
-        frequency: int,
-        coord_names: List[str],
-        leadtime_max: int,
-        start_date_min: date,
-        end_date_max: date = None,
-        start_date: Union[date, str] = None,
-        end_date: Union[date, str] = None,
-    ):
-        super().__init__(
-            country_config=country_config,
-            geo_bounding_box=geo_bounding_box,
-            cds_name=cds_name,
-            system_version=system_version,
-            product_type=product_type,
-            date_variable_prefix=date_variable_prefix,
-            frequency=frequency,
-            coord_names=coord_names,
-            start_date_min=start_date_min,
-            end_date_max=end_date_max,
-            start_date=start_date,
-            end_date=end_date,
-            leadtime_max=leadtime_max,
-        )
-
-    @check_file_existence
-    def _load_single_file(
-        self, input_filepath: Path, filepath: Path, clobber: bool
-    ) -> xr.Dataset:
-        return _read_in_ensemble_and_perturbed_datasets(
-            filepath=input_filepath
-        )
-
-
-def _read_in_ensemble_and_perturbed_datasets(filepath: Path):
-    """Read in forecast and reforecast data.
-
-    The GloFAS forecast and reforecast data GRIB files contain two
-    separate datasets: the control member, generated from the most accurate
-    estimate of current conditions, and the perturbed forecast, which
-    contains N ensemble members created by perturbing the control forecast.
-
-    This function reads in both datasets and creates an N+1 (perturbed
-    + control) ensemble.
-    See `this paper <https://hess.copernicus.org/preprints/hess-2020-532/>`__
-    for more details.
-    """
-    ds_list = []
-    for data_type in ["cf", "pf"]:
-        ds = xr.load_dataset(
-            filepath,
-            engine="cfgrib",
-            backend_kwargs={
-                "indexpath": "",
-                "filter_by_keys": {"dataType": data_type},
-            },
-        )
-        # Extra processing require for control forecast
-        if data_type == "cf":
-            ds = ds.expand_dims(dim="number")
-        ds_list.append(ds)
-    return xr.combine_by_coords(ds_list, combine_attrs="drop_conflicts")
-
-
-class GlofasForecast(_GlofasForecastBase):
+class GlofasForecast(glofas.Glofas):
     """
     Class for downloading and processing GloFAS forecast data.
 
@@ -126,6 +52,10 @@ class GlofasForecast(_GlofasForecastBase):
     end_date : Union[date, str], default: date.today()
         The ending date for the dataset. If left blank, defaults to
         the current date
+    model_version : int, default: 4
+        The version of the GloFAS model to use, can only be 3 or 4.
+        If in doubt, always use the latest (default).
+
     Examples
     --------
     Download, process and load GloFAS forecast data for the past month,
@@ -161,28 +91,40 @@ def __init__(
         leadtime_max: int,
         start_date: Union[date, str] = None,
         end_date: Union[date, str] = None,
+        model_version: int = glofas.DEFAULT_MODEL_VERSION,
     ):
         super().__init__(
             country_config=country_config,
             geo_bounding_box=geo_bounding_box,
             start_date=start_date,
             end_date=end_date,
             cds_name="cems-glofas-forecast",
-            system_version="operational",
+            model_version=model_version,
             product_type=["control_forecast", "ensemble_perturbed_forecasts"],
             date_variable_prefix="",
             frequency=rrule.DAILY,
-            coord_names=["number", "step"],
+            coord_names=["time", "number", "step"],
             leadtime_max=leadtime_max,
             start_date_min=date(year=2021, month=5, day=26),
         )
 
     @staticmethod
-    def _preprocess_load(ds: xr.Dataset) -> xr.Dataset:
-        return ds.expand_dims("time")
+    def _system_version_dict() -> glofas.SystemVersions:
+        system_versions = glofas.SystemVersions()
+        system_versions[3] = "version_3_1"
+        system_versions[4] = "operational"
+        return system_versions
+
+    @check_file_existence
+    def _load_single_file(
+        self, input_filepath: Path, filepath: Path, clobber: bool
+    ) -> xr.Dataset:
+        return _read_in_ensemble_and_perturbed_datasets(
+            filepath=input_filepath
+        ).expand_dims("time")
 
 
-class GlofasReforecast(_GlofasForecastBase):
+class GlofasReforecast(glofas.Glofas):
     """
     Class for downloading and processing GloFAS reforecast data.
 
@@ -215,6 +157,9 @@ class GlofasReforecast(_GlofasForecastBase):
     end_date : Union[date, str], default: date(year=2018, month=12, day=31)
         The ending date for the dataset. If left blank, defaults to the
         last available date
+    model_version : int, default: 4
+        The version of the GloFAS model to use, can only be 3 or 4.
+        If in doubt, always use the latest (default).
 
     Examples
     --------
@@ -248,14 +193,29 @@ def __init__(
         leadtime_max: int,
         start_date: Union[date, str] = None,
         end_date: Union[date, str] = None,
+        model_version: int = glofas.DEFAULT_MODEL_VERSION,
     ):
+        # Unfortunately start date min and max, as well as months,
+        # depend on the model version. If this were more complicated
+        # then it may make sense to use a factory, but that would change
+        # how the constructor is called so trying to avoid it for now.
+        if model_version == 3:
+            start_date_min = date(year=1999, month=1, day=1)
+            end_date_max = date(year=2018, month=12, day=31)
+            month_list = None
+        elif model_version == 4:
+            start_date_min = date(year=2003, month=3, day=1)
+            end_date_max = date(year=2022, month=8, day=31)
+            month_list = list(range(3, 9))  # March to August
+        else:
+            raise ValueError("Model version must be 3 or 4")
         super().__init__(
             country_config=country_config,
             geo_bounding_box=geo_bounding_box,
             start_date=start_date,
             end_date=end_date,
+            model_version=model_version,
             cds_name="cems-glofas-reforecast",
-            system_version="version_3_1",
             product_type=[
                 "control_reforecast",
                 "ensemble_perturbed_reforecasts",
@@ -264,6 +224,52 @@ def __init__(
             frequency=rrule.MONTHLY,
             coord_names=["number", "time", "step"],
             leadtime_max=leadtime_max,
-            start_date_min=date(year=1999, month=1, day=1),
-            end_date_max=date(year=2018, month=12, day=31),
+            start_date_min=start_date_min,
+            end_date_max=end_date_max,
+            month_list=month_list,
+        )
+
+    @staticmethod
+    def _system_version_dict() -> glofas.SystemVersions:
+        system_versions = glofas.SystemVersions()
+        system_versions[3] = "version_3_1"
+        system_versions[4] = "version_4_0"
+        return system_versions
+
+    @check_file_existence
+    def _load_single_file(
+        self, input_filepath: Path, filepath: Path, clobber: bool
+    ) -> xr.Dataset:
+        return _read_in_ensemble_and_perturbed_datasets(
+            filepath=input_filepath
+        )
+
+
+def _read_in_ensemble_and_perturbed_datasets(filepath: Path):
+    """Read in forecast and reforecast data.
+
+    The GloFAS forecast and reforecast data GRIB files contain two
+    separate datasets: the control member, generated from the most accurate
+    estimate of current conditions, and the perturbed forecast, which
+    contains N ensemble members created by perturbing the control forecast.
+
+    This function reads in both datasets and creates an N+1 (perturbed
+    + control) ensemble.
+    See `this paper <https://hess.copernicus.org/preprints/hess-2020-532/>`__
+    for more details.
+    """
+    ds_list = []
+    for data_type in ["cf", "pf"]:
+        ds = xr.load_dataset(
+            filepath,
+            engine="cfgrib",
+            backend_kwargs={
+                "indexpath": "",
+                "filter_by_keys": {"dataType": data_type},
+            },
         )
+        # Extra processing require for control forecast
+        if data_type == "cf":
+            ds = ds.expand_dims(dim="number")
+        ds_list.append(ds)
+    return xr.combine_by_coords(ds_list, combine_attrs="drop_conflicts")