diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 381515d..4414706 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,7 @@ Changed ~~~~~~~ - tox configuration moved to ``setup.cfg`` from ``tox.ini`` +- ``kalendar`` library used for ``dekad`` and ``pentad`` classes Fixed ~~~~~ @@ -22,7 +23,6 @@ Fixed - Automatic raster dimension setting for `lat/lon` and `X/Y` fixed to work for `rioxarray` methods - [1.0.1] - 2023-02-03 -------------------- diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 15339a2..dbb995a 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -272,6 +272,8 @@ jsonschema==4.17.3 # -r requirements/requirements.txt # frictionless # tableschema-to-template +kalendar==0.1.1 + # via -r requirements/requirements.txt libhxl==4.27.3 # via # -r requirements/requirements.txt diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 4d9d3c7..10fc639 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -147,6 +147,8 @@ jsonschema==4.17.3 # via # frictionless # tableschema-to-template +kalendar==0.1.1 + # via ocha-anticipy (setup.cfg) libhxl==4.27.3 # via hdx-python-country locket==1.0.0 diff --git a/setup.cfg b/setup.cfg index ffc61d1..cc2fc3b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,6 +40,7 @@ install_requires = geopandas hdx-python-api>=5.6.4 hdx-python-country + kalendar>=0.1.1 netCDF4 numpy pydantic diff --git a/src/ochanticipy/datasources/usgs/ndvi_base.py b/src/ochanticipy/datasources/usgs/ndvi_base.py index 740e9cc..491a5fa 100644 --- a/src/ochanticipy/datasources/usgs/ndvi_base.py +++ b/src/ochanticipy/datasources/usgs/ndvi_base.py @@ -28,27 +28,22 @@ import pandas as pd import rioxarray # noqa: F401 import xarray as xr +from kalendar import Dekad from rasterio.errors import RasterioIOError import ochanticipy.utils.raster # noqa: F401 from ochanticipy.config.countryconfig import CountryConfig from ochanticipy.datasources.datasource import DataSource -from ochanticipy.utils.dates import ( - compare_dekads_gt, - compare_dekads_lt, - dekad_to_date, - expand_dekads, - get_dekadal_date, -) +from ochanticipy.utils.dates import get_kalendar_date, kalendar_range logger = logging.getLogger(__name__) -_DATE_TYPE = Union[date, str, Tuple[int, int], None] -_EARLIEST_DATE = (2002, 19) +_DATE_TYPE = Union[date, str, Tuple[int, int], Dekad, None] +_EARLIEST_DATE = Dekad(2002, 19) # USGS has reported degradation of USGS NDVI data # from the below date and warnings should be used -_DEGRADATION_DATE = (2022, 13) +_DEGRADATION_DATE = Dekad(2022, 13) class _UsgsNdvi(DataSource): @@ -68,15 +63,16 @@ class _UsgsNdvi(DataSource): Start date. Can be passed as a ``datetime.date`` object or a data string in ISO8601 format, and the relevant dekad will be determined. Or pass - directly as year-dekad tuple, e.g. (2020, 1). - If ``None``, ``start_date`` is set to earliest + directly as year-dekad tuple, e.g. (2020, 1) or + ``kalendar.Dekad``. If ``None``, ``start_date`` is set to earliest date with data: 2002, dekad 19. end_date : _DATE_TYPE, default = None End date. Can be passed as a ``datetime.date`` - object and the relevant dekad will be determined, - as a date string in ISO8601 format, or as a - year-dekad tuple, i.e. (2020, 1). If ``None``, - ``end_date`` is set to ``date.today()``. + object or a data string in ISO8601 format, and + the relevant dekad will be determined. Or pass + directly as year-dekad tuple, e.g. (2020, 1) or + ``kalendar.Dekad``. If ``None``, ``end_date`` is + set to earliest ``date.today``. """ def __init__( @@ -102,15 +98,19 @@ def __init__( self._data_variable_suffix = data_variable_suffix # set dates for data download and processing - self._start_date = get_dekadal_date( - input_date=start_date, default_date=_EARLIEST_DATE + self._start_date = get_kalendar_date( + kalendar_class=Dekad, + input_date=start_date, + default_date=_EARLIEST_DATE, ) - self._end_date = get_dekadal_date( - input_date=end_date, default_date=date.today() + self._end_date = get_kalendar_date( + kalendar_class=Dekad, + input_date=end_date, + default_date=date.today(), ) - if compare_dekads_gt(self._end_date, _DEGRADATION_DATE): + if self._end_date > _DEGRADATION_DATE: logger.warning( "USGS has reported degradation of eMODIS NDVI data " "due to issues with the MODIS sensor's satellite. " @@ -121,11 +121,10 @@ def __init__( ) # warn if dates outside earliest dates - if compare_dekads_lt(self._start_date, _EARLIEST_DATE): + if self._start_date < _EARLIEST_DATE: logger.warning( "Start date is before earliest date data is available. " - f"Data will be downloaded from {_EARLIEST_DATE[0]}, dekad " - f"{_EARLIEST_DATE[1]}." + f"Data will be downloaded from {_EARLIEST_DATE}." ) def download(self, clobber: bool = False) -> Path: @@ -164,11 +163,9 @@ def download(self, clobber: bool = False) -> Path: ... ) >>> bfa_ndvi.download() """ - download_dekads = expand_dekads( - dekad1=self._start_date, dekad2=self._end_date - ) - for year, dekad in download_dekads: - self._download_ndvi_dekad(year=year, dekad=dekad, clobber=clobber) + download_dekads = kalendar_range(x=self._start_date, y=self._end_date) + for dekad in download_dekads: + self._download_ndvi_dekad(dekad=dekad, clobber=clobber) return self._raw_base_dir def process( # type: ignore @@ -280,8 +277,8 @@ def process( # type: ignore ) # get dates for processing - all_dates_to_process = expand_dekads( - dekad1=self._start_date, dekad2=self._end_date + all_dates_to_process = kalendar_range( + x=self._start_date, y=self._end_date ) for stat, is_percentile in zip(process_stats, percentile_identifier): @@ -364,12 +361,10 @@ def load(self, feature_col: str) -> pd.DataFrame: # type: ignore ) from err # filter loaded data frame between our instances dates - load_dates = expand_dekads( - dekad1=self._start_date, dekad2=self._end_date - ) + load_dates = kalendar_range(x=self._start_date, y=self._end_date) loaded_dates = df[["year", "dekad"]].values.tolist() - keep_rows = [tuple(d) in load_dates for d in loaded_dates] + keep_rows = [Dekad(*d) in load_dates for d in loaded_dates] df = df.loc[keep_rows] @@ -380,7 +375,7 @@ def load(self, feature_col: str) -> pd.DataFrame: # type: ignore return df def load_raster( - self, load_date: Union[date, str, Tuple[int, int]] + self, load_date: Union[date, str, Tuple[int, int], Dekad] ) -> xr.DataArray: """Load raster for specific year and dekad. @@ -389,8 +384,9 @@ def load_raster( load_date : Union[date, str, Tuple[int, int]] Date. Can be passed as a ``datetime.date`` object and the relevant dekad will be determined, - as a date string in ISO8601 format, or as a - year-dekad tuple, i.e. (2020, 1). + as a date string in ISO8601 format, as a + year-dekad tuple, i.e. (2020, 1), or a + ``kalendar.Dekad``. Returns ------- @@ -402,9 +398,9 @@ def load_raster( FileNotFoundError If the requested file cannot be found. """ - year, dekad = get_dekadal_date(input_date=load_date) + dekad = get_kalendar_date(kalendar_class=Dekad, input_date=load_date) - filepath = self._get_raw_path(year=year, dekad=dekad, local=True) + filepath = self._get_raw_path(dekad=dekad, local=True) try: da = rioxarray.open_rasterio(filepath) # get time file was updated @@ -414,9 +410,9 @@ def load_raster( da = ( da.assign_coords( { - "year": year, - "dekad": dekad, - "date": dekad_to_date(dekad=(year, dekad)), + "year": dekad.year, + "dekad": dekad.dekad, + "date": dekad.todate(), "modified": file_time, } ) @@ -429,19 +425,14 @@ def load_raster( except RasterioIOError as err: # check if the requested date is outside the instance bounds # don't prevent loading, but use for meaningful error - gt_end = compare_dekads_gt( - dekad1=(year, dekad), dekad2=self._end_date - ) - lt_start = compare_dekads_lt( - dekad1=(year, dekad), dekad2=self._start_date - ) + gt_end = dekad > self._end_date + lt_start = dekad < self._start_date if gt_end or lt_start: file_warning = ( - f"The requested year and dekad, {year}-{dekad}" - f"are {'greater' if gt_end else 'less'} than the " - f"instance {'end' if gt_end else 'start'} year and dekad, " - f"{self._end_date[0] if gt_end else self._start_date[0]}-" - f"{self._end_date[1] if gt_end else self._start_date[1]}. " + f"The requested date, {dekad}" + f"is {'greater' if gt_end else 'less'} than the " + f"instance {'end' if gt_end else 'start'} date, " + f"{self._end_date if gt_end else self._start_date[0]}. " "Calling the `download()` method will not download this " "file, and you need to re-instantiate the class to " "include these dates." @@ -456,24 +447,18 @@ def load_raster( f"Cannot open the .tif file {filepath}. {file_warning}" ) from err - def _download_ndvi_dekad( - self, year: int, dekad: int, clobber: bool - ) -> None: + def _download_ndvi_dekad(self, dekad: Dekad, clobber: bool) -> None: """Download NDVI for specific dekad. Parameters ---------- - year : int - Year - dekad : int - Dekad + dekad : Dekad + Dekadal date clobber : bool If True, overwrites existing file """ - filepath = self._get_raw_path(year=year, dekad=dekad, local=True) - url_filename = self._get_raw_filename( - year=year, dekad=dekad, local=False - ) + filepath = self._get_raw_path(dekad=dekad, local=True) + url_filename = self._get_raw_filename(dekad=dekad, local=False) self._download( filepath=filepath, url_filename=url_filename, clobber=clobber ) @@ -482,15 +467,12 @@ def _download(self, filepath: Path, url_filename: str, clobber: bool): local_filename = filepath.stem url = self._get_url(filename=url_filename) - year, dekad = self._fp_year_dekad(filepath) + dekad = self._fp_dekad(filepath) try: resp = urlopen(url) except HTTPError: - logger.error( - f"No NDVI data available for " - f"dekad {dekad} of {year}, skipping." - ) + logger.error(f"No NDVI data available for {dekad}, skipping.") return if filepath.exists(): @@ -509,10 +491,7 @@ def _download(self, filepath: Path, url_filename: str, clobber: bool): ) return filepath - logger.info( - f"Downloading NDVI data for {year}, dekad {dekad} " - f"into {filepath}." - ) + logger.info(f"Downloading NDVI data for {dekad} into {filepath}.") # open file within memory zf = ZipFile(BytesIO(resp.read())) @@ -532,7 +511,7 @@ def _process( clobber: bool, gdf: gpd.GeoDataFrame, feature_col: str, - dates_to_process: list, + dates_to_process: List[Dekad], stat: str, is_percentile: bool, kwargs, @@ -554,9 +533,8 @@ def _process( if processed_path.is_file(): logger.info( - f"Processing data from {self._start_date[0]}, " - f"dekad {self._start_date[1]} to {self._end_date[0]} " - f"dekad {self._end_date[1]} into {processed_path}." + f"Processing data from {self._start_date}, " + f"to {self._end_date} into {processed_path}. " ) ( @@ -572,10 +550,7 @@ def _process( logger.info( ( "No new {stat} data to process between " - f"{self._start_date[0]}, " - f"dekad {self._start_date[1]} " - f"and {self._end_date[0]}, " - f"dekad {self._end_date[1]}, " + f"{self._start_date} and {self._end_date}, " "set `clobber = True` to re-process this data." ) ) @@ -607,7 +582,7 @@ def _process( return processed_path def _determine_process_dates( - self, clobber: bool, filepath: Path, dates_to_process: list + self, clobber: bool, filepath: Path, dates_to_process: List[Dekad] ) -> Tuple[list, pd.DataFrame]: """Determine dates to process. @@ -617,12 +592,12 @@ def _determine_process_dates( If True, overwrites existing file filepath : Path Filepath to the existing processed file. - dates_to_process : list + dates_to_process : List[Dekad] List of dates to process Returns ------- - Tuple[list, pd.DataFrame] + Tuple[List[Dekad], pd.DataFrame] Returns a list of dates to process, filtered based on clobber, and a data frame of existing data to build upon in processing @@ -641,7 +616,7 @@ def _determine_process_dates( ["year", "dekad", "modified"] ].values.tolist() dates_already_processed = { - tuple(d[0:2]): d[2] for d in dates_already_processed + Dekad(*d[0:2]): d[2] for d in dates_already_processed } if clobber: @@ -658,7 +633,7 @@ def _determine_process_dates( d for d in dates_to_process if d not in list(dates_already_processed.keys()) - or self._get_modified_time(year=d[0], dekad=d[1]) + or self._get_modified_time(dekad=d) > dates_already_processed[d] ] @@ -671,15 +646,13 @@ def _determine_process_dates( df = df.loc[keep_rows] return (dates_to_process, df) - def _get_raw_filename(self, year: int, dekad: int, local: bool) -> str: + def _get_raw_filename(self, dekad: Dekad, local: bool) -> str: """Get raw filename (excluding file type suffix). Parameters ---------- - year : int - 4-digit year dekad : int - Dekad + Dekadal date local : bool If True, returns filepath for local storage, which includes full 4-digit year and _ @@ -694,12 +667,12 @@ def _get_raw_filename(self, year: int, dekad: int, local: bool) -> str: for .tif files stored within the .zip """ if local: - file_year = f"{year:04}_" + file_year = f"{dekad.year:04}_" else: - file_year = f"{year-2000:02}" + file_year = f"{dekad.year-2000:02}" file_name = ( f"{self._datasource_config.area_prefix}{file_year}" - f"{dekad:02}{self._data_variable_suffix}" + f"{dekad.dekad:02}{self._data_variable_suffix}" ) return file_name @@ -725,15 +698,13 @@ def _load(filepath: Path, drop_modified: bool = False): df.drop(["modified"], axis=1, inplace=True) return df - def _get_raw_path(self, year: int, dekad: int, local: bool) -> Path: + def _get_raw_path(self, dekad: Dekad, local: bool) -> Path: """Get raw filepath. Parameters ---------- - year : int - 4-digit year dekad : int - Dekad + Dekadal date local : bool If True, returns filepath for local storage, which includes full 4-digit year and _ @@ -746,10 +717,10 @@ def _get_raw_path(self, year: int, dekad: int, local: bool) -> Path: Path Path to raw file """ - filename = self._get_raw_filename(year=year, dekad=dekad, local=local) + filename = self._get_raw_filename(dekad=dekad, local=local) return self._raw_base_dir / f"{filename}.tif" - def _get_modified_time(self, year: int, dekad: int) -> datetime: + def _get_modified_time(self, dekad: Dekad) -> datetime: """Get modified time of raw file. Used to determine when to re-process @@ -767,7 +738,7 @@ def _get_modified_time(self, year: int, dekad: int) -> datetime: datetime Timestamp of when file was modified. """ - filepath = self._get_raw_path(year=year, dekad=dekad, local=True) + filepath = self._get_raw_path(dekad=dekad, local=True) return datetime.fromtimestamp(filepath.stat().st_mtime) def _get_processed_filename(self, feature_col: str, stat: str) -> str: @@ -829,7 +800,7 @@ def _get_url(self, filename) -> str: # TODO: potentially move from static method to # wider USGS function repository @staticmethod - def _fp_year_dekad(path: Path) -> List[int]: + def _fp_dekad(path: Path) -> Dekad: """Extract year and dekad from filepath. Parameters @@ -845,4 +816,4 @@ def _fp_year_dekad(path: Path) -> List[int]: filename = path.stem # find two groups, first for year second for dekad regex = re.compile(r"(\d{4})_(\d{2})") - return [int(x) for x in regex.findall(filename)[0]] + return Dekad(*[int(x) for x in regex.findall(filename)[0]]) diff --git a/src/ochanticipy/datasources/usgs/ndvi_products.py b/src/ochanticipy/datasources/usgs/ndvi_products.py index 96c8ce5..eda1d22 100644 --- a/src/ochanticipy/datasources/usgs/ndvi_products.py +++ b/src/ochanticipy/datasources/usgs/ndvi_products.py @@ -13,6 +13,8 @@ from datetime import date from typing import Tuple, Union +from kalendar import Dekad + from ochanticipy.config.countryconfig import CountryConfig from ochanticipy.datasources.usgs.ndvi_base import _UsgsNdvi @@ -37,15 +39,16 @@ class UsgsNdviSmoothed(_UsgsNdvi): Start date. Can be passed as a ``datetime.date`` object or a data string in ISO8601 format, and the relevant dekad will be determined. Or pass - directly as year-dekad tuple, e.g. (2020, 1). - If ``None``, ``start_date`` is set to earliest - date with data: 2002, dekad 19. + directly as year-dekad tuple, e.g. (2020, 1) or + ``kalendar.Dekad``. If ``None``, ``start_date`` + is set to earliest date with data: 2002, dekad 19. end_date : _DATE_TYPE, default = None End date. Can be passed as a ``datetime.date`` - object and the relevant dekad will be determined, - as a date string in ISO8601 format, or as a - year-dekad tuple, i.e. (2020, 1). If ``None``, - ``end_date`` is set to ``date.today()``. + object or a data string in ISO8601 format, and + the relevant dekad will be determined. Or pass + directly as year-dekad tuple, e.g. (2020, 1) or + ``kalendar.Dekad``. If ``None``, ``end_date`` is + set to earliest ``date.today``. Examples -------- @@ -76,8 +79,8 @@ class UsgsNdviSmoothed(_UsgsNdvi): def __init__( self, country_config: CountryConfig, - start_date: Union[date, str, Tuple[int, int], None] = None, - end_date: Union[date, str, Tuple[int, int], None] = None, + start_date: Union[date, str, Tuple[int, int], Dekad, None] = None, + end_date: Union[date, str, Tuple[int, int], Dekad, None] = None, ): super().__init__( country_config=country_config, @@ -104,15 +107,16 @@ class UsgsNdviPctMedian(_UsgsNdvi): Start date. Can be passed as a ``datetime.date`` object or a data string in ISO8601 format, and the relevant dekad will be determined. Or pass - directly as year-dekad tuple, e.g. (2020, 1). - If ``None``, ``start_date`` is set to earliest - date with data: 2002, dekad 19. + directly as year-dekad tuple, e.g. (2020, 1) or + ``kalendar.Dekad``. If ``None``, ``start_date`` + is set to earliest date with data: 2002, dekad 19. end_date : _DATE_TYPE, default = None End date. Can be passed as a ``datetime.date`` - object and the relevant dekad will be determined, - as a date string in ISO8601 format, or as a - year-dekad tuple, i.e. (2020, 1). If ``None``, - ``end_date`` is set to ``date.today()``. + object or a data string in ISO8601 format, and + the relevant dekad will be determined. Or pass + directly as year-dekad tuple, e.g. (2020, 1) or + ``kalendar.Dekad``. If ``None``, ``end_date`` is + set to earliest ``date.today``. Examples -------- @@ -143,8 +147,8 @@ class UsgsNdviPctMedian(_UsgsNdvi): def __init__( self, country_config: CountryConfig, - start_date: Union[date, str, Tuple[int, int], None] = None, - end_date: Union[date, str, Tuple[int, int], None] = None, + start_date: Union[date, str, Tuple[int, int], Dekad, None] = None, + end_date: Union[date, str, Tuple[int, int], Dekad, None] = None, ): super().__init__( country_config=country_config, @@ -174,15 +178,16 @@ class UsgsNdviMedianAnomaly(_UsgsNdvi): Start date. Can be passed as a ``datetime.date`` object or a data string in ISO8601 format, and the relevant dekad will be determined. Or pass - directly as year-dekad tuple, e.g. (2020, 1). - If ``None``, ``start_date`` is set to earliest - date with data: 2002, dekad 19. + directly as year-dekad tuple, e.g. (2020, 1) or + ``kalendar.Dekad``. If ``None``, ``start_date`` + is set to earliest date with data: 2002, dekad 19. end_date : _DATE_TYPE, default = None End date. Can be passed as a ``datetime.date`` - object and the relevant dekad will be determined, - as a date string in ISO8601 format, or as a - year-dekad tuple, i.e. (2020, 1). If ``None``, - ``end_date`` is set to ``date.today()``. + object or a data string in ISO8601 format, and + the relevant dekad will be determined. Or pass + directly as year-dekad tuple, e.g. (2020, 1) or + ``kalendar.Dekad``. If ``None``, ``end_date`` is + set to earliest ``date.today``. Examples -------- @@ -213,8 +218,8 @@ class UsgsNdviMedianAnomaly(_UsgsNdvi): def __init__( self, country_config: CountryConfig, - start_date: Union[date, str, Tuple[int, int], None] = None, - end_date: Union[date, str, Tuple[int, int], None] = None, + start_date: Union[date, str, Tuple[int, int], Dekad, None] = None, + end_date: Union[date, str, Tuple[int, int], Dekad, None] = None, ): super().__init__( country_config=country_config, @@ -244,15 +249,16 @@ class UsgsNdviYearDifference(_UsgsNdvi): Start date. Can be passed as a ``datetime.date`` object or a data string in ISO8601 format, and the relevant dekad will be determined. Or pass - directly as year-dekad tuple, e.g. (2020, 1). - If ``None``, ``start_date`` is set to earliest - date with data: 2002, dekad 19. + directly as year-dekad tuple, e.g. (2020, 1) or + `kalendar.Dekad`. If ``None``, ``start_date`` + is set to earliest date with data: 2002, dekad 19. end_date : _DATE_TYPE, default = None End date. Can be passed as a ``datetime.date`` - object and the relevant dekad will be determined, - as a date string in ISO8601 format, or as a - year-dekad tuple, i.e. (2020, 1). If ``None``, - ``end_date`` is set to ``date.today()``. + object or a data string in ISO8601 format, and + the relevant dekad will be determined. Or pass + directly as year-dekad tuple, e.g. (2020, 1) or + `kalendar.Dekad`. If ``None``, ``end_date`` is + set to earliest ``date.today``. Examples -------- @@ -283,8 +289,8 @@ class UsgsNdviYearDifference(_UsgsNdvi): def __init__( self, country_config: CountryConfig, - start_date: Union[date, str, Tuple[int, int], None] = None, - end_date: Union[date, str, Tuple[int, int], None] = None, + start_date: Union[date, str, Tuple[int, int], Dekad, None] = None, + end_date: Union[date, str, Tuple[int, int], Dekad, None] = None, ): super().__init__( country_config=country_config, diff --git a/src/ochanticipy/utils/dates.py b/src/ochanticipy/utils/dates.py index f6a3474..29a5710 100644 --- a/src/ochanticipy/utils/dates.py +++ b/src/ochanticipy/utils/dates.py @@ -1,8 +1,9 @@ """Functions for dealing with dates.""" -import itertools from datetime import date -from typing import List, Tuple, Union, cast +from typing import List, Tuple, Union, cast, overload + +from kalendar import Dekad, Pentad def get_date_from_user_input(input_date: Union[date, str]) -> date: @@ -40,161 +41,114 @@ def get_date_from_user_input(input_date: Union[date, str]) -> date: return input_date -def get_dekadal_date( - input_date: Union[date, str, Tuple[int, int], None], - default_date: Union[date, str, Tuple[int, int], None] = None, -) -> Tuple[int, int]: - """Calculate dekadal date from general input. +@overload +def get_kalendar_date( + kalendar_class: Dekad, + input_date: Union[date, str, Tuple[int, int], Dekad, None], + default_date: Union[date, str, Tuple[int, int], Dekad, None] = None, +) -> Dekad: + ... - Processes input ``input_date`` and returns two - values, the year and dekad. Input can be of - format ``datetime.date``, an ISO8601 date - string, an already calculated ``(year, dekad)`` - format date, or ``None``. If ``None``, - ``default_date`` is returned. ``default_date`` - can also be passed in the above formats. + +@overload +def get_kalendar_date( # type: ignore + kalendar_class: Pentad, + input_date: Union[date, str, Tuple[int, int], Pentad, None], + default_date: Union[date, str, Tuple[int, int], Pentad, None] = None, +) -> Pentad: + ... + + +def get_kalendar_date( + kalendar_class: Union[Dekad, Pentad], + input_date: Union[date, str, Tuple[int, int], Dekad, Pentad, None], + default_date: Union[ + date, str, Tuple[int, int], Dekad, Pentad, None + ] = None, +) -> Union[Dekad, Pentad]: + """Calculate kalendar date from general input. + + Is used for both dekads and pentads coming from + kalendar. """ if input_date is None and default_date is not None: input_date = default_date - # convert date to various values - if not isinstance(input_date, (str, date)): + if isinstance(input_date, kalendar_class): + return input_date + if isinstance(input_date, date): + return kalendar_class.fromdate(input_date) + if isinstance(input_date, str): + return kalendar_class.fromisoformat(input_date) + else: input_tuple = cast(Tuple[int, int], input_date) - if len(input_tuple) == 2: - year, dekad = input_tuple - # assert year-dekad values appropriate, not too strict - if year < 1000 or year > 9999 or dekad < 1 or dekad > 36: - raise ValueError( - f"(year, dekad) tuple ({year}, {dekad}) invalid. " - "Year should be a 4-digit year and dekad between " - "1 and 36." - ) - - else: + try: + return Dekad(*input_tuple) + except (ValueError, TypeError) as e: raise ValueError( - ( - "`date` values for dekadal data " - "should be passed in as " - "`datetime.date` objects, tuples " - "of `(year, dekad)` format, or " - "ISO8601 date strings." - ) - ) + "`date` values for dekad or pentad date " + "should be passed in as " + "`datetime.date` or `kalendar.Dekad/Pentad` " + "objects, tuples " + "of `(year, dekad)` format, or " + "ISO8601 date strings." + ) from e - else: - input_as_date = get_date_from_user_input(input_date) - year, dekad = date_to_dekad(input_as_date) - return year, dekad - - -def dekad_to_date(dekad: Tuple[int, int]) -> date: - """Compute date from dekad and year. - - Date computed from dekad and year in - datetime object, corresponding to - first day of the dekad. This - is based on the - `common dekadal definition - `_ - of the 1st and 2nd dekad of a month - being the first 10 day periods, and - the 3rd dekad being the remaining - days within that month. - """ - year = dekad[0] - month = ((dekad[1] - 1) // 3) + 1 - day = 10 * ((dekad[1] - 1) % 3) + 1 - return date(year=year, month=month, day=day) - - -def date_to_dekad(date_obj: date) -> Tuple[int, int]: - """Compute dekad and year from date. - - Dekad computed from date. This - is based on the - `common dekadal definition - `_ - of the 1st and 2nd dekad of a month - being the first 10 day periods, and - the 3rd dekad being the remaining - days within that month. - """ - year = date_obj.year - dekad = min((date_obj.day - 1) // 10, 2) + ((date_obj.month - 1) * 3) + 1 - return (year, dekad) - - -def compare_dekads_lt( - dekad1: Tuple[int, int], dekad2: Tuple[int, int] -) -> bool: - """Is year1/dekad1 less than year2/dekad2. +def get_dekadal_date( + input_date: Union[date, str, Tuple[int, int], Dekad, None], + default_date: Union[date, str, Tuple[int, int], Dekad, None] = None, +) -> Dekad: + """Calculate dekadal date from general input. - Compare two pairs of years and dekads, - that the first pair are less than the - second pair. + Processes input ``input_date`` and returns two + values, the year and dekad. Input can be of + format ``datetime.date``, an ISO8601 date + string, an already calculated ``(year, dekad)`` + tuple, ``kalendar.Dekad`` object, or ``None``. If ``None``, + ``default_date`` is returned. ``default_date`` + can also be passed in the above formats. """ - y1, d1 = dekad1 - y2, d2 = dekad2 - return y1 < y2 or ((y1 == y2) and (d1 < d2)) + return get_kalendar_date( + kalendar_class=Dekad, input_date=input_date, default_date=default_date + ) -def compare_dekads_lte( - dekad1: Tuple[int, int], dekad2: Tuple[int, int] -) -> bool: - """Is year1/dekad1 less than or equal to year2/dekad2. +def get_pentadal_date( + input_date: Union[date, str, Tuple[int, int], Pentad, None], + default_date: Union[date, str, Tuple[int, int], Pentad, None] = None, +) -> Pentad: + """Calculate kalendar date from general input. - Compare two pairs of years and dekads, - that the first pair are less than or - equal to the second pair. + Processes input ``input_date`` and returns two + values, the year and pentad. Input can be of + format ``datetime.date``, an ISO8601 date + string, an already calculated ``(year, pentad)`` + tuple, ``kalendar.Pentad`` object, or ``None``. If ``None``, + ``default_date`` is returned. ``default_date`` + can also be passed in the above formats. """ - y1, d1 = dekad1 - y2, d2 = dekad2 - return y1 < y2 or ((y1 == y2) and (d1 <= d2)) - - -def compare_dekads_gt( - dekad1: Tuple[int, int], dekad2: Tuple[int, int] -) -> bool: - """Is year1/dekad1 greater than year2/dekad2. + return get_kalendar_date( + kalendar_class=Pentad, input_date=input_date, default_date=default_date + ) - Compare two pairs of years and dekads, - that the first pair are greater than the - second pair. - """ - return compare_dekads_lt(dekad1=dekad2, dekad2=dekad1) +@overload +def kalendar_range(x: Dekad, y: Dekad) -> List[Dekad]: + ... -def compare_dekads_gte( - dekad1: Tuple[int, int], dekad2: Tuple[int, int] -) -> bool: - """Is year1/dekad1 greater than or equal to year2/dekad2. - Compare two pairs of years and dekads, - that the first pair are greater than or - equal to the second pair. - """ - return compare_dekads_lte(dekad1=dekad2, dekad2=dekad1) +@overload +def kalendar_range(x: Pentad, y: Pentad) -> List[Pentad]: # type: ignore + ... -def expand_dekads( - dekad1: Tuple[int, int], dekad2: Tuple[int, int] -) -> List[Tuple[int, int]]: - """Expand for all years/dekads between two dates. +def kalendar_range( + x: Union[Dekad, Pentad], y: Union[Dekad, Pentad] +) -> List[Union[Dekad, Pentad]]: + """Expand between dekads and pentads. - Takes input year and dekads and returns a list - of year/dekad lists. + Takes input dekad or pentad and returns a list + of dekads/pentads. """ - if compare_dekads_gt(dekad1, dekad2): - raise ValueError("`dekad1` must be less than or equal to `dekad2`.") - - y1, d1 = dekad1 - y2, d2 = dekad2 - year_range = range(y1, y2 + 1) - dekad_range = range(1, 37) - date_combos = itertools.product(*[year_range, dekad_range]) - - def valid(y, d): - return not ((y == y1 and d < d1) or (y == y2 and d > d2)) - - return [(y, d) for y, d in date_combos if valid(y, d)] + return [x + i for i in range(y - x + 1)] diff --git a/tests/datasources/test_ndvi.py b/tests/datasources/test_ndvi.py index ec7d192..649b11f 100644 --- a/tests/datasources/test_ndvi.py +++ b/tests/datasources/test_ndvi.py @@ -8,6 +8,7 @@ import pandas as pd import pytest import xarray as xr +from kalendar import Dekad from shapely.geometry import Polygon from ochanticipy import ( @@ -16,6 +17,7 @@ UsgsNdviSmoothed, UsgsNdviYearDifference, ) +from ochanticipy.utils.dates import kalendar_range DATASOURCE_BASE_DIR = "usgs_ndvi" @@ -34,7 +36,6 @@ def mock_ndvi(mock_country_config): } def _mock_ndvi(variable: str = "smoothed"): - ndvi = instantiator[variable]( country_config=mock_country_config, start_date=start_date, @@ -244,10 +245,10 @@ def test_load_if_process_not_called(mock_ndvi): ndvi.load(feature_col="name") -def test_fp_year_dekad(): +def test_fp_dekad(): """Test that year and dekad extracted from FP.""" fp = Path("ea2022_10pct.tif") - assert UsgsNdviPctMedian._fp_year_dekad(fp) == [2022, 10] + assert UsgsNdviPctMedian._fp_dekad(fp) == Dekad(2022, 10) def test_get_url(mock_ndvi): @@ -265,7 +266,7 @@ def test_get_url(mock_ndvi): def test_process_dates_clobber_true(mock_determine_process_dates): """Test process dates clobbers properly.""" - dps = [(2019, 36), (2020, 1), (2020, 2)] + dps = kalendar_range(x=Dekad(2019, 36), y=Dekad(2020, 2)) test_dps, df = mock_determine_process_dates( clobber=True, dates_to_process=dps ) @@ -275,7 +276,7 @@ def test_process_dates_clobber_true(mock_determine_process_dates): def test_process_dates_clobber_false(mock_determine_process_dates): """Test process dates doesn't clobber unnecessarily.""" - dps = [(2019, 36), (2020, 1), (2020, 2)] + dps = kalendar_range(x=Dekad(2019, 36), y=Dekad(2020, 2)) test_dps, df = mock_determine_process_dates( clobber=False, dates_to_process=dps ) @@ -285,7 +286,7 @@ def test_process_dates_clobber_false(mock_determine_process_dates): def test_process_dates_clobber_true_additional(mock_determine_process_dates): """Test process dates clobbers properly.""" - dps = [(2019, 35), (2019, 36), (2020, 1), (2020, 2), (2020, 3)] + dps = kalendar_range(x=Dekad(2019, 35), y=Dekad(2020, 3)) test_dps, df = mock_determine_process_dates( clobber=True, dates_to_process=dps ) @@ -295,7 +296,7 @@ def test_process_dates_clobber_true_additional(mock_determine_process_dates): def test_process_dates_clobber_false_additional(mock_determine_process_dates): """Test process dates doesn't clobber unnecessarily.""" - dps = [(2019, 35), (2019, 36), (2020, 1), (2020, 2), (2020, 3)] + dps = kalendar_range(x=Dekad(2019, 35), y=Dekad(2020, 3)) test_dps, df = mock_determine_process_dates( clobber=False, dates_to_process=dps ) diff --git a/tests/utils/test_dates.py b/tests/utils/test_dates.py index 2bf1c3d..743be17 100644 --- a/tests/utils/test_dates.py +++ b/tests/utils/test_dates.py @@ -3,6 +3,7 @@ from datetime import date, datetime import pytest +from kalendar import Dekad, Pentad from ochanticipy.utils import dates @@ -27,7 +28,7 @@ def test_get_date_value_error(): def test_get_dekadal_date(): """Tests get dekadal date.""" - desired_dekad = (2013, 8) + desired_dekad = Dekad(2013, 8) desired_str = "2013-03-14" assert dates.get_dekadal_date(desired_str) == desired_dekad assert dates.get_dekadal_date(desired_dekad) == desired_dekad @@ -46,44 +47,15 @@ def test_get_dekadal_date_value_error(): with pytest.raises(ValueError): dates.get_dekadal_date("2020-01-1") with pytest.raises(ValueError): - dates.get_dekadal_date((20, 17)) + dates.get_dekadal_date((2017, 37)) -def test_dekad_to_date(): - """Test conversion from dekad to date.""" - assert dates.dekad_to_date((2020, 1)) == date.fromisoformat("2020-01-01") - assert dates.dekad_to_date((2016, 20)) == date.fromisoformat("2016-07-11") - assert dates.dekad_to_date((2024, 36)) == date.fromisoformat("2024-12-21") - - -def test_date_to_dekad(): - """Test conversion from dekad to date.""" - assert dates.date_to_dekad(date.fromisoformat("2020-01-09")) == (2020, 1) - assert dates.date_to_dekad(date.fromisoformat("2016-07-20")) == (2016, 20) - assert dates.date_to_dekad(date.fromisoformat("2024-12-31")) == (2024, 36) - - -def test_compare_dekads(): - """Test comparing dekads.""" - dekad1 = (2019, 36) - dekad2 = (2020, 1) - assert dates.compare_dekads_gt(dekad2, dekad1) - assert dates.compare_dekads_gte(dekad2, dekad1) - assert dates.compare_dekads_lt(dekad1, dekad2) - assert dates.compare_dekads_lte(dekad1, dekad2) - assert dates.compare_dekads_gte(dekad2, dekad2) - assert dates.compare_dekads_lte(dekad2, dekad2) - - -def test_expand_dekads(): +def test_kalendar_range(): """Test expanding dekads.""" - dekad1 = (2019, 33) - dekad2 = (2020, 3) - dekads = dates.expand_dekads(dekad1, dekad2) - assert len(dekads) == 7 - assert dekads[0] == dekad1 - assert dekads[6] == dekad2 - assert dekads[4] == (2020, 1) - # error raised for reverse attempt - with pytest.raises(ValueError): - dates.expand_dekads(dekad2, dekad1) + pentad1 = Pentad(2019, 70) + pentad2 = Pentad(2020, 3) + pentads = dates.kalendar_range(x=pentad1, y=pentad2) + assert len(pentads) == 7 + assert pentads[0] == pentad1 + assert pentads[6] == pentad2 + assert pentads[4] == Pentad(2020, 1)