diff --git a/docs/savefig/fig_hedge.png b/docs/savefig/fig_hedge.png index 0d1ca13..8efdcad 100644 Binary files a/docs/savefig/fig_hedge.png and b/docs/savefig/fig_hedge.png differ diff --git a/docs/savefig/fig_offtake.png b/docs/savefig/fig_offtake.png index 6b01520..820ee35 100644 Binary files a/docs/savefig/fig_offtake.png and b/docs/savefig/fig_offtake.png differ diff --git a/docs/specialized_topics/dataprep.rst b/docs/specialized_topics/dataprep.rst index 5a4b26c..21912e3 100644 --- a/docs/specialized_topics/dataprep.rst +++ b/docs/specialized_topics/dataprep.rst @@ -69,14 +69,14 @@ Don't worry if our data does not yet have the timezone we want to use in our app Frequency --------- -The index must have a frequency (``fr.index.freq``); it must be one of the ones in ``portfolyo.FREQUENCIES``. The following abbreviations are used by ``pandas`` and throughout this package: +The index must have a frequency (``fr.index.freq``) and it must be a valid frequency. To check validity of the frequency, one may use ``portfolyo.assert_freq_valid()``. The following abbreviations are used by ``pandas`` and throughout this package: * ``15T``: quarterhourly; * ``H``: hourly; * ``D``: daily; * ``MS``: monthly; -* ``QS``: quarterly; -* ``AS``: yearly. +* ``QS``: quarterly. Also allowed ``QS-FEB``, ``QS-MAR``, etc.; +* ``AS``: yearly. Also allowed ``AS-FEB``, ``AS-MAR``, etc. If the frequency is not set, we can try to make pandas infer it: @@ -155,4 +155,4 @@ This function also tries to localize ``fr`` if it is not timezone-aware, and the .. rubric:: Footnotes -.. [#f1] However, there is no harm in doing the localization to the target timezone if it is possible. In specific situations, localization is not possible (if we (a) have (quarter)hourly values that we (b) want to localize to a timezone with daylight-savings-time such as "Europe/Berlin" and (c) the moment of the DST-transition is included in the input data) and ``fr.tz_localize()`` raises a ``NonExistentTimeError`` or a ``AmbiguousTimeError``. \ No newline at end of file +.. [#f1] However, there is no harm in doing the localization to the target timezone if it is possible. In specific situations, localization is not possible (if we (a) have (quarter)hourly values that we (b) want to localize to a timezone with daylight-savings-time such as "Europe/Berlin" and (c) the moment of the DST-transition is included in the input data) and ``fr.tz_localize()`` raises a ``NonExistentTimeError`` or a ``AmbiguousTimeError``. diff --git a/docs/tutorial/part1.ipynb b/docs/tutorial/part1.ipynb index 49ae3eb..64c4dbb 100644 --- a/docs/tutorial/part1.ipynb +++ b/docs/tutorial/part1.ipynb @@ -240,27 +240,7 @@ "source": [ "(``portfolyo`` ensures that the values are aggregated correctly. In this case, the price (``p``) values are weighted-averaged (weighted with the duration of each datapoint - in this case a uniform 24h). See [Resampling](../specialized_topics/resampling.rst) for more information.)\n", "\n", - "The argument ``\"QS\"`` specifies that we want quarterly values. The allowed values, in decreasing duration, are in the ``pf.FREQUENCIES`` constant (``\"15T\"`` means quarterhourly):" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['AS', 'QS', 'MS', 'D', 'H', '15T']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pf.FREQUENCIES" + "The argument ``\"QS\"`` specifies that we want quarterly values starting from January (same as ``\"QS-JAN\"``). The allowed values, in increasing duration, are following: ``\"15T\"`` (=quarterhourly), ``\"H\"`` (=hourly), ``\"D\"`` (=daily), ``\"MS\"`` (=monthly), ``\"QS\"`` (=quarterly, or ``\"QS-FEB\"``, ``\"QS-MAR\"``, etc.), or ``\"AS\"`` (=yearly, or ``\"AS-FEB\"``, ``\"AS-MAR\"``, etc.).\n" ] }, { @@ -809,7 +789,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.10.14" }, "orig_nbformat": 4, "vscode": { diff --git a/portfolyo/__init__.py b/portfolyo/__init__.py index 8fe3f38..6cbdaed 100644 --- a/portfolyo/__init__.py +++ b/portfolyo/__init__.py @@ -13,7 +13,7 @@ from .tools.changefreq import averagable as asfreq_avg from .tools.changefreq import summable as asfreq_sum from .tools.changeyear import characterize_index, map_frame_to_year -from .tools.freq import FREQUENCIES +from .tools.freq import assert_freq_valid from .tools.hedge import hedge from .tools.peakfn import PeakFunction from .tools.peakfn import factory as create_peakfn diff --git a/portfolyo/core/commodity/commodity.py b/portfolyo/core/commodity/commodity.py index 77a284f..168f886 100644 --- a/portfolyo/core/commodity/commodity.py +++ b/portfolyo/core/commodity/commodity.py @@ -13,10 +13,11 @@ class Commodity: offset_hours: int = 0 def __post_init__(self): - if self.freq not in (freqs := tools.freq.FREQUENCIES): - raise ValueError( - f"``freq`` must be one of {', '.join(freqs)}; got {self.freq}." - ) + # if self.freq not in (freqs := tools.freq.FREQUENCIES): + # raise ValueError( + # f"``freq`` must be one of {', '.join(freqs)}; got {self.freq}." + # ) + tools.freq.assert_freq_valid(self.freq) power = Commodity( diff --git a/portfolyo/tools/ceil.py b/portfolyo/tools/ceil.py index 552d506..a1cb631 100644 --- a/portfolyo/tools/ceil.py +++ b/portfolyo/tools/ceil.py @@ -21,7 +21,7 @@ def stamp( ---------- ts : pd.Timestamp Timestamp to ceil. - freq : {{{', '.join(tools_freq.FREQUENCIES)}}} + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS} Frequency for which to ceil the timestamp. future : int, optional (default: 0) 0 to ceil to current period. 1 (-1) to round to period after (before) that, etc. diff --git a/portfolyo/tools/changefreq.py b/portfolyo/tools/changefreq.py index dd6a0f9..f63a0f8 100644 --- a/portfolyo/tools/changefreq.py +++ b/portfolyo/tools/changefreq.py @@ -127,7 +127,7 @@ def _general(is_summable: bool, s: pd.Series, freq: str = "MS") -> pd.Series: True if data is summable, False if it is averagable. s : pd.Series Series that needs to be resampled. - freq : {{{', '.join(tools_freq.FREQUENCIES)}}}, optional (default: 'MS') + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS}, optional (default: 'MS') Target frequency. Returns @@ -172,7 +172,7 @@ def index(i: pd.DatetimeIndex, freq: str = "MS") -> pd.DatetimeIndex: ---------- i : pd.DatetimeIndex Index to resample. - freq : {{{', '.join(tools_freq.FREQUENCIES)}}} + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS} Target frequency. Returns @@ -203,7 +203,7 @@ def summable(fr: Series_or_DataFrame, freq: str = "MS") -> Series_or_DataFrame: ---------- fr : Series or DataFrame Pandas Series or DataFrame to be resampled. - freq : {{{', '.join(tools_freq.FREQUENCIES)}}}, optional (default: 'MS') + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS}, optional (default: 'MS') Target frequency. Returns @@ -239,7 +239,7 @@ def averagable(fr: Series_or_DataFrame, freq: str = "MS") -> Series_or_DataFrame ---------- fr : Series or DataFrame Pandas Series or DataFrame to be resampled. - freq : {{{', '.join(tools_freq.FREQUENCIES)}}}, optional (default: 'MS') + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS}, optional (default: 'MS') Target frequency. Returns diff --git a/portfolyo/tools/duration.py b/portfolyo/tools/duration.py index cc5dded..6d773c1 100644 --- a/portfolyo/tools/duration.py +++ b/portfolyo/tools/duration.py @@ -14,7 +14,7 @@ def stamp(ts: pd.Timestamp, freq: str) -> tools_unit.Q_: ---------- ts : pd.Timestamp Timestamp for which to calculate the duration. - freq : {{{', '.join(tools_freq.FREQUENCIES)}}} + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS} Frequency to use in determining the duration. Returns diff --git a/portfolyo/tools/floor.py b/portfolyo/tools/floor.py index 6da0d75..837f64a 100644 --- a/portfolyo/tools/floor.py +++ b/portfolyo/tools/floor.py @@ -21,7 +21,7 @@ def stamp( ---------- ts : pd.Timestamp Timestamp to floor. - freq : {{{', '.join(tools_freq.FREQUENCIES)}}} + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS} Frequency for which to floor the timestamp. future : int, optional (default: 0) 0 to floor to current period. 1 (-1) to round to period after (before) that, etc. diff --git a/portfolyo/tools/freq.py b/portfolyo/tools/freq.py index d2f3e34..679cb2b 100644 --- a/portfolyo/tools/freq.py +++ b/portfolyo/tools/freq.py @@ -7,15 +7,90 @@ from .types import Series_or_DataFrame + # Allowed frequencies. -# Perfect containment; a short-frequency time period always entirely falls within a single high-frequency time period. -# AS -> 4 QS; QS -> 3 MS; MS -> 28-31 D; D -> 23-25 H; H -> 4 15T -FREQUENCIES = ["AS", "QS", "MS", "D", "H", "15T"] +ALLOWED_FREQUENCIES_DOCS = "'15T' (=quarterhour), 'H', 'D', 'MS', 'QS' (or 'QS-FEB', 'QS-MAR', etc.), or 'AS' (or 'AS-FEB', 'AS-MAR', etc.)" +ALLOWED_CLASSES = [ + pd.tseries.offsets.YearBegin, + pd.tseries.offsets.QuarterBegin, + pd.tseries.offsets.MonthBegin, + pd.tseries.offsets.Day, + pd.tseries.offsets.Hour, + pd.tseries.offsets.Minute, +] +TO_OFFSET = pd.tseries.frequencies.to_offset +SHORTEST_TO_LONGEST = [ + type(TO_OFFSET(freq)) for freq in ["15T", "H", "D", "MS", "QS", "AS"] +] + +quarter_matrix = [ + ["QS", "QS-APR", "QS-JUL", "QS-OCT"], + ["QS-FEB", "QS-MAY", "QS-AUG", "QS-NOV"], + ["QS-MAR", "QS-JUN", "QS-SEP", "QS-DEC"], +] + + +def assert_freq_valid(freq: str) -> None: + """ + Validate if the given frequency string is allowed based on pandas offset objects. + + Parameters: + freq (str): A string representing a frequency alias (e.g., "AS", "QS", "MS"). + + Raises: + ValueError: If the frequency is not allowed. + """ + + freq_offset = pd.tseries.frequencies.to_offset(freq) + mro_class = freq_offset.__class__.__mro__[0] + + # Check if the MRO is in the list of allowed MROs + # have to make sure it's only the first class on the list + if mro_class not in ALLOWED_CLASSES: + raise ValueError(f"The passed frequency '{freq}' is not allowed.") + + # Define restricted classes that should have n == 1 + restricted_classes = { + pd.tseries.offsets.MonthBegin: 1, + pd.tseries.offsets.Day: 1, + pd.tseries.offsets.Hour: 1, + pd.tseries.offsets.Minute: 15, + } + allowed_n = restricted_classes.get(type(freq_offset)) + if allowed_n is not None: # case where freq is not in restricted class + # Check if freq_offset.n is not None and if it doesn't match allowed_n + if freq_offset.n is None or freq_offset.n != allowed_n: + raise ValueError(f"The passed frequency {freq} is not allowed.") + + +def assert_freq_sufficiently_long(freq, freq_ref, strict: bool = False) -> None: + """ + Compares ``freq`` and ``freq_ref``, raising an AssertionError if ``freq`` is not long enough. + + Parameters + ---------- + freq_source, freq_ref : frequencies to compare. + strict : bool, optional (default: False) + - If ``strict`` is True, ``freq`` must be strictly longer than ``freq_long``. + - If False, it may be equally long. + + """ + # freq should start from the beginning of the year + index_freq = SHORTEST_TO_LONGEST.index(type(TO_OFFSET(freq))) + index_ref = SHORTEST_TO_LONGEST.index(type(TO_OFFSET(freq_ref))) + if strict is True: + if not (index_freq > index_ref): + raise AssertionError( + f"The passed frequency is not sufficiently long; passed {freq}, but should be {freq_ref} or longer." + ) + else: + if not (index_freq >= index_ref): + raise AssertionError( + f"The passed frequency is not sufficiently long; passed {freq}, but should be {freq_ref} or longer." + ) -def up_or_down( - freq_source: str, freq_target: str, common_ts: pd.Timestamp = None -) -> int: +def up_or_down(freq_source: str, freq_target: str) -> int: """ Compare source frequency with target frequency to see if it needs up- or downsampling. @@ -25,8 +100,6 @@ def up_or_down( Parameters ---------- freq_source, freq_target : frequencies to compare. - common_ts : timestamp, optional - Timestamp to use as anchor from which to compare the two. Returns ------- @@ -36,9 +109,7 @@ def up_or_down( Notes ----- - Arbitrarily using a time point as anchor to calculate the length of the time period - from. May have influence on the ratio (duration of a month, quarter, year etc are - influenced by this), but, for most common frequencies, not on which is longer. + If the freq can't be down- or upsampled, throws ValueError. Examples -------- @@ -48,22 +119,94 @@ def up_or_down( 1 >>> freq.up_or_down('MS', 'MS') 0 + >>> freq.up_or_down('QS', 'QS-APR') + ValueError + + """ + restricted_classes = [ + pd._libs.tslibs.offsets.QuarterBegin, + pd._libs.tslibs.offsets.YearBegin, + ] + # Convert freq from str to offset + freq_source_as_offset = pd.tseries.frequencies.to_offset(freq_source) + freq_target_as_offset = pd.tseries.frequencies.to_offset(freq_target) + + # Compare if the freq are the same + if freq_source_as_offset == freq_target_as_offset: + return 0 + # One of the freq can be in restricted class, but not both + if not ( + type(freq_source_as_offset) in restricted_classes + and type(freq_target_as_offset) in restricted_classes + ): + try: + assert_freq_sufficiently_long(freq_source, freq_target, strict=True) + return 1 + except AssertionError: + return -1 + # If both in restricted class + else: + source_index = restricted_classes.index(type(freq_source_as_offset)) + target_index = restricted_classes.index(type(freq_target_as_offset)) + group_by_month_beginn = ( + freq_source_as_offset.startingMonth + if source_index == 0 + else freq_source_as_offset.month + ) % 3 == ( + freq_target_as_offset.startingMonth + if target_index == 0 + else freq_target_as_offset.month + ) % 3 + + if group_by_month_beginn: + if source_index > target_index: + # we are in case AS and QS + return 1 + elif source_index < target_index: + # we are in the case QS and AS + return -1 + elif source_index == 0: + # we are in the case QS and QS + return 0 + + raise ValueError( + f"The passed frequency {freq_source} can't be aggregated to {freq_target}." + ) + + +def assert_freq_equally_long(freq, freq_ref) -> None: + """ + Compares ``freq`` and ``freq_ref``, raising an AssertionError if ``freq`` is not equally long as ``freq_ref``. + + Parameters + ---------- + freq_source, freq_ref : frequencies to compare. + Valid examples + -------- + >>> freq.assert_freq_equally_long('QS', 'QS') + or + >>> freq.assert_freq_equally_long('QS', 'QS-APR') + or + >>> freq.assert_freq_equally_long('QS', 'QS-FEB') + + """ + assert_freq_sufficiently_long(freq, freq_ref, strict=False) + assert_freq_sufficiently_long(freq_ref, freq, strict=False) + + +def assert_freq_sufficiently_short(freq, freq_ref, strict: bool = False) -> None: + """ + Compares ``freq`` and ``freq_ref``, raising an AssertionError if ``freq`` is not short enough. + + Parameters + ---------- + freq_source, freq_ref : frequencies to compare. + strict : bool, optional (default: False) + - If ``strict`` is True, ``freq`` must be strictly shorter than ``freq_long``. + - If False, it may be equally long, or rather, short. + """ - standard_common_ts = pd.Timestamp("2020-01-01 0:00") - backup_common_ts = pd.Timestamp("2020-02-03 04:05:06") - if common_ts is None: - common_ts = standard_common_ts - - ts1 = common_ts + pd.tseries.frequencies.to_offset(freq_source) - ts2 = common_ts + pd.tseries.frequencies.to_offset(freq_target) - if ts1 > ts2: - return 1 - elif ts1 < ts2: - return -1 - if common_ts == standard_common_ts: - # If they are the same, try with another timestamp. - return up_or_down(freq_source, freq_target, backup_common_ts) - return 0 # only if both give the same answer. + assert_freq_sufficiently_long(freq_ref, freq, strict) def _longestshortest(shortest: bool, *freqs: str): @@ -133,27 +276,25 @@ def to_offset(freq: str) -> pd.Timedelta | pd.DateOffset: >>> freq.to_offset("MS") """ - if freq == "15T": - return pd.Timedelta(hours=0.25) - elif freq == "H": + # Convert the frequency string to an offset object + offset = pd.tseries.frequencies.to_offset(freq) + + # Custom handling for specific simple frequencies + if isinstance(offset, pd.tseries.offsets.Minute) and offset.n == 15: + return pd.Timedelta(minutes=15) + elif isinstance(offset, pd.tseries.offsets.Hour) and offset.n == 1: return pd.Timedelta(hours=1) - elif freq == "D": + elif isinstance(offset, pd.tseries.offsets.Day) and offset.n == 1: return pd.DateOffset(days=1) - elif freq == "MS": + elif isinstance(offset, pd.tseries.offsets.MonthBegin) and offset.n == 1: return pd.DateOffset(months=1) - elif freq == "QS": + elif isinstance(offset, pd.tseries.offsets.QuarterBegin) and offset.n == 1: return pd.DateOffset(months=3) - elif freq == "AS": + elif isinstance(offset, pd.tseries.offsets.YearBegin) and offset.n == 1: return pd.DateOffset(years=1) else: - for freq2 in ["MS", "QS"]: # Edge case: month-/quarterly but starting != Jan. - try: - if up_or_down(freq2, freq) == 0: - return to_offset(freq2) - except ValueError: # freq is not a valid frequency - pass raise ValueError( - f"Parameter ``freq`` must be one of {', '.join(FREQUENCIES)}; got '{freq}'." + f"Parameter ``freq`` must be one of {ALLOWED_FREQUENCIES_DOCS}; got '{freq}'." ) @@ -168,7 +309,7 @@ def from_tdelta(tdelta: pd.Timedelta) -> str: Returns ------- str - One of {', '.join(FREQUENCIES)}. + One of {ALLOWED_FREQUENCIES_DOCS}. """ if tdelta == pd.Timedelta(minutes=15): return "15T" @@ -185,69 +326,74 @@ def from_tdelta(tdelta: pd.Timedelta) -> str: else: raise ValueError( f"The timedelta ({tdelta}) doesn't seem to be fit to any of the allowed " - f"frequencies ({', '.join(FREQUENCIES)})." + f"frequencies ({ALLOWED_FREQUENCIES_DOCS})." ) -def set_to_index( - i: pd.DatetimeIndex, wanted: str = None, strict: bool = False -) -> pd.DatetimeIndex: - """Try to read, infer, or force frequency of index. - +def guess_to_index(i: pd.DatetimeIndex) -> pd.DatetimeIndex: + """ "Try to infer the frequency of the index and set it if possible. Parameters ---------- i : pd.DatetimeIndex - wanted : str, optional (default: None) - Frequency to set. If none provided, try to infer. - strict : bool, optional (default: False) - If True, raise ValueError if a valid frequency is not found. - Returns ------- pd.DatetimeIndex - with same values as ``i``, but, if possible, a valid value for ``i.freq``. + DatetimeIndex, with the inferred frequency if possible. """ # Find frequency. - i = i.copy(deep=True) if i.freq: - pass - elif wanted: - i.freq = wanted - else: - try: - i.freq = pd.infer_freq(i) - except ValueError: - pass # couldn't find one, e.g. because not enough values - - # Correct if necessary. - freq = i.freq - if not freq and strict: # No frequency found. - raise ValueError("The index does not seem to have a regular frequency.") - elif freq and freq not in FREQUENCIES: - # Edge case: year-/quarterly but starting != Jan. - if up_or_down(freq, "AS") == 0: - i.freq = "AS" # will likely fail - elif up_or_down(freq, "QS") == 0: - i.freq = "QS" # will only succeed if QS-APR, QS-JUL or QS-OCT - elif strict: - raise ValueError( - f"The data has a non-allowed frequency. Must be one of {', '.join(FREQUENCIES)}; found '{freq}'." - ) + return i + # Freq not set. + i = i.copy(deep=True) + + try: + inferred_freq = pd.infer_freq(i) + for row_index in range(len(quarter_matrix)): # Loop through the rows + if ( + inferred_freq in quarter_matrix[row_index] + ): # check if inferred_freq is somewhere in this row + inferred_freq = quarter_matrix[row_index][ + 0 + ] # set inferred_freq to the first value in the row + i.freq = inferred_freq + + except ValueError: + pass # Couldn't find a frequency, e.g., because there are not enough values return i -def set_to_frame( - fr: Series_or_DataFrame, wanted: str = None, strict: bool = False -) -> Series_or_DataFrame: - """Try to read, infer, or force frequency of frame's index. +def guess_to_frame(fr: Series_or_DataFrame) -> Series_or_DataFrame: + """Try to infer the frequency of the frame's index and set it if possible. Parameters ---------- fr : pd.Series or pd.DataFrame - wanted : str, optional - Frequency to set. If none provided, try to infer. - strict : bool, optional (default: False) - If True, raise ValueError if a valid frequency is not found. + + Returns + ------- + pd.Series | pd.DataFrame + Same type as ``fr``, with the inferred frequency if possible. + """ + # Handle non-datetime-indices. + if not isinstance(fr.index, pd.DatetimeIndex): + raise ValueError( + "The data does not have a datetime index and can therefore not have a frequency." + ) + + if fr.index.freq: + return fr + + return fr.set_axis(guess_to_index(fr.index), axis=0) + + +def set_to_frame(fr: Series_or_DataFrame, wanted: str) -> Series_or_DataFrame: + """Try to force frequency of frame's index. + + Parameters + ---------- + fr : pd.Series or pd.DataFrame + wanted : str + Frequency to set. Returns ------- @@ -260,5 +406,8 @@ def set_to_frame( "The data does not have a datetime index and can therefore not have a frequency." ) - i = set_to_index(fr.index, wanted, strict) + # Set frequency. + i = fr.index.copy(deep=True) + i.freq = wanted + return fr.set_axis(i, axis=0) diff --git a/portfolyo/tools/isboundary.py b/portfolyo/tools/isboundary.py index 001a5e4..1fe464a 100644 --- a/portfolyo/tools/isboundary.py +++ b/portfolyo/tools/isboundary.py @@ -88,7 +88,7 @@ def stamp(ts: pd.Timestamp, freq: str, start_of_day: dt.time = None) -> bool: ---------- ts : pd.Timestamp Timestamp for which to do the assertion. - freq : {{{', '.join(tools_freq.FREQUENCIES)}}} + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS} Frequency for which to check if the timestamp is a valid start (or end) timestamp. start_of_day : dt.time, optional (default: midnight) Time of day at which daily-or-longer delivery periods start. E.g. if @@ -122,7 +122,7 @@ def index(i: pd.DatetimeIndex, freq: str) -> pd.Series: ---------- ts : pd.Timestamp Timestamp for which to do the assertion. - freq : {{{', '.join(tools_freq.FREQUENCIES)}}} + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS} Frequency for which to check if the timestamp is a valid start (or end) timestamp. Returns diff --git a/portfolyo/tools/right.py b/portfolyo/tools/right.py index b1f10d7..e36b1de 100644 --- a/portfolyo/tools/right.py +++ b/portfolyo/tools/right.py @@ -14,7 +14,7 @@ def stamp(ts: pd.Timestamp, freq: str = None) -> pd.Timestamp: ---------- ts : pd.Timestamp Timestamp for which to calculate the right-bound timestamp. - freq : {{{', '.join(tools_freq.FREQUENCIES)}}} + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS} Frequency to use in determining the right-bound timestamp. Returns diff --git a/portfolyo/tools/righttoleft.py b/portfolyo/tools/righttoleft.py index b85b433..5384352 100644 --- a/portfolyo/tools/righttoleft.py +++ b/portfolyo/tools/righttoleft.py @@ -5,6 +5,7 @@ import pandas as pd from . import freq as tools_freq +from . import startofday as tools_startofday def index(i: pd.DatetimeIndex, how: str = "A") -> pd.DatetimeIndex: @@ -35,8 +36,15 @@ def index(i: pd.DatetimeIndex, how: str = "A") -> pd.DatetimeIndex: # a DST-changeover will have missing or repeated timestamps. # If frequency is known, we can use pandas built-in to make leftbound. - if (freq := i.freq) is not None or (freq := pd.infer_freq(i)) is not None: - return i - tools_freq.to_offset(freq) + if i.freq is None: + i = tools_freq.guess_to_index(i) + + if i.freq is not None: + i2 = i.shift(-1) + # HACK: pandas does not give correct result if first timestamp is after DST-transition + if i.freq == "D" and tools_startofday.get(i2) != tools_startofday.get(i): + i2 = i - tools_freq.to_offset(i.freq) + return i2 # Couldn't infer frequency. Try from median timedelta and turn into time offset. offst = tools_freq.to_offset(tools_freq.from_tdelta((i[1:] - i[:-1]).median())) diff --git a/portfolyo/tools/round.py b/portfolyo/tools/round.py index f74caa0..64b59d4 100644 --- a/portfolyo/tools/round.py +++ b/portfolyo/tools/round.py @@ -22,7 +22,7 @@ def stamp_general( fn : {'floor', 'ceil'} ts : pd.Timestamp Timestamp for which to do the rounding. - freq : {{{', '.join(tools_freq.FREQUENCIES)}}} + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS} Frequency for which to round the timestamp. future : int, optional (default: 0) 0 to round to current period. 1 (-1) to round to period after (before) that, etc. @@ -48,7 +48,7 @@ def stamp_current( fn : {'floor', 'ceil'} ts : pd.Timestamp Timestamp for which to do the rounding. - freq : {{{', '.join(tools_freq.FREQUENCIES)}}} + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS} Frequency for which to round the timestamp. start_of_day : dt.time, optional (default: midnight) Time of day at which daily-or-longer delivery periods start. E.g. if @@ -169,5 +169,5 @@ def _offset(freq: str, future: int): return pd.offsets.YearBegin(future) else: raise ValueError( - f"Parameter ``freq`` must be one of {', '.join(tools_freq.FREQUENCIES)}; got {freq}." + f"Parameter ``freq`` must be one of {tools_freq.ALLOWED_FREQUENCIES_DOCS}; got {freq}." ) diff --git a/portfolyo/tools/standardize.py b/portfolyo/tools/standardize.py index 054811c..bc5ea1c 100644 --- a/portfolyo/tools/standardize.py +++ b/portfolyo/tools/standardize.py @@ -77,7 +77,7 @@ def frame( # Make sure it has a frequency, i.e., make sure it is tz-aware or tz-agnostic. # Pipeline if frequency not yet found: right -> left -> localize -> tz-aware -> freq - fr = tools_freq.set_to_frame(fr) + fr = tools_freq.guess_to_frame(fr) freq_input, tz_input = fr.index.freq, fr.index.tz # The data may be right-bound. @@ -124,7 +124,9 @@ def frame( # Standardize index name. fr = _standardize_index_name(fr) # After standardizing timezone, the frequency should have been set. - return tools_freq.set_to_frame(fr, freq_input, strict=True) + fr = tools_freq.set_to_frame(fr, freq_input) + tools_freq.assert_freq_valid(fr.index.freq) + return fr def _fix_rightbound(fr, force, tz, floating): @@ -169,10 +171,11 @@ def assert_index_standardized(i: pd.DatetimeIndex, __right: bool = False): freq = i.freq if not freq: raise AssertionError("Index must have frequency set.") - if freq not in (freqs := tools_freq.FREQUENCIES): - raise AssertionError( - f"Index frequency must be one of {', '.join(freqs)}; found '{freq}'." - ) + # if freq not in (freqs := tools_freq.FREQUENCIES): + # raise AssertionError( + # f"Index frequency must be one of {', '.join(freqs)}; found '{freq}'." + # ) + tools_freq.assert_freq_valid(freq) # Check length. if not len(i): diff --git a/portfolyo/tools/trim.py b/portfolyo/tools/trim.py index 2322f86..932f3d9 100644 --- a/portfolyo/tools/trim.py +++ b/portfolyo/tools/trim.py @@ -20,7 +20,7 @@ def index(i: pd.DatetimeIndex, freq: str) -> pd.DatetimeIndex: ---------- i : pd.DatetimeIndex The (untrimmed) DatetimeIndex - freq : {{{', '.join(tools_freq.FREQUENCIES)}}} + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS} Frequency to trim to. E.g. 'MS' to only keep full months. Returns @@ -64,7 +64,7 @@ def frame(fr: pd.Series | pd.DataFrame, freq: str) -> pd.Series | pd.DataFrame: ---------- fr : Series or DataFrame The (untrimmed) pandas series or dataframe. - freq : {{{', '.join(tools_freq.FREQUENCIES)}}} + freq : {tools_freq.ALLOWED_FREQUENCIES_DOCS} Frequency to trim to. E.g. 'MS' to only keep full months. Returns diff --git a/portfolyo/tools/tzone.py b/portfolyo/tools/tzone.py index 6a2b381..32c2df5 100644 --- a/portfolyo/tools/tzone.py +++ b/portfolyo/tools/tzone.py @@ -17,7 +17,7 @@ DST-transition: - When representing hourly values, there are (correctly) skipped or doubled timestamps. However, the freq cannot be inferred. -- When represinging daily values, all timestamps are present and a freq can be inferred. +- When representing daily values, all timestamps are present and a freq can be inferred. However, not all timestamps have the correct duration (e.g., 24h when day actually has 23h or 25h). @@ -114,7 +114,7 @@ def force_aware( ) # Copy, try to set freq, and store original attributes. - fr = tools_freq.set_to_frame(fr) + fr = tools_freq.guess_to_frame(fr) freq_input, tz_input = fr.index.freq, fr.index.tz if not freq_input: @@ -171,7 +171,7 @@ def force_agnostic(fr: Series_or_DataFrame) -> Series_or_DataFrame: this conversion is probably what we want, regardless of the unit. """ # Copy, try to set freq, and store original attributes. - fr = tools_freq.set_to_frame(fr) + fr = tools_freq.guess_to_frame(fr) freq_input, tz_input = fr.index.freq, fr.index.tz if not freq_input: @@ -219,7 +219,7 @@ def _B_to_A(fr: Series_or_DataFrame, *, tz) -> Series_or_DataFrame: def _idx_after_conversion(fr: Series_or_DataFrame, tz) -> pd.DatetimeIndex: - fr = tools_freq.set_to_frame(fr) + fr = tools_freq.guess_to_frame(fr) freq_input = fr.index.freq if not freq_input: raise ValueError("Cannot recalculate values if frequency is not known.") diff --git a/setup.cfg b/setup.cfg index 4e57090..4edd93f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,7 @@ max-line-length = 120 ignore = E501, W503, E202, E226 [tool:pytest] -addopts = --cov=. +#addopts = --cov=. markers = only_on_pr: marks tests as slow (select with -m only_on_pr and deselect with -m "not only_on_pr") pythonpath = ./tests diff --git a/tests/core/pfline/test_flat_helper.py b/tests/core/pfline/test_flat_helper.py index c713f9f..1f80b62 100644 --- a/tests/core/pfline/test_flat_helper.py +++ b/tests/core/pfline/test_flat_helper.py @@ -3,12 +3,25 @@ import pandas as pd import pytest -from portfolyo import dev, testing, tools +from portfolyo import dev, testing from portfolyo.core.pfline import flat_helper +TEST_FREQUENCIES = [ + "AS", + "AS-FEB", + "AS-APR", + "QS", + "QS-FEB", + "QS-APR", + "MS", + "D", + "H", + "15T", +] + @pytest.mark.parametrize("tz", ["Europe/Berlin", None]) -@pytest.mark.parametrize("freq", tools.freq.FREQUENCIES) +@pytest.mark.parametrize("freq", TEST_FREQUENCIES) def test_makedataframe_freqtz(freq, tz): """Test if dataframe can made from data with various timezones and frequencies.""" diff --git a/tests/core/pfline/test_pfline_init.py b/tests/core/pfline/test_pfline_init.py index 62632d9..fdea8a6 100644 --- a/tests/core/pfline/test_pfline_init.py +++ b/tests/core/pfline/test_pfline_init.py @@ -12,6 +12,19 @@ from portfolyo import Kind, PfLine, create, dev from portfolyo.core.pfline import classes +TEST_FREQUENCIES = [ + "15T", + "H", + "D", + "MS", + "QS", + "QS-FEB", + "QS-APR", + "AS", + "AS-FEB", + "AS-APR", +] + @dataclass class InitTestcase: @@ -173,7 +186,7 @@ def anyerror(*args): @pytest.mark.only_on_pr -@pytest.mark.parametrize("freq", pf.FREQUENCIES[::2]) +@pytest.mark.parametrize("freq", TEST_FREQUENCIES[::2]) @pytest.mark.parametrize("tz", ["Europe/Berlin", None]) @pytest.mark.parametrize("columns", ["w", "q", "p", "pr", "qr", "pq", "wp", "wr"]) @pytest.mark.parametrize("inputtype", InputTypeA) @@ -213,7 +226,7 @@ def test_init_A( @pytest.mark.only_on_pr -@pytest.mark.parametrize("freq", pf.FREQUENCIES[::2]) +@pytest.mark.parametrize("freq", TEST_FREQUENCIES[::2]) @pytest.mark.parametrize("tz", ["Europe/Berlin", None]) @pytest.mark.parametrize("kind", Kind) @pytest.mark.parametrize("inputtype", InputTypeB) diff --git a/tests/tools/test_freq.py b/tests/tools/test_freq.py index 545e8ba..88e38fb 100644 --- a/tests/tools/test_freq.py +++ b/tests/tools/test_freq.py @@ -5,6 +5,17 @@ from portfolyo import tools freqs_small_to_large = ["T", "5T", "15T", "30T", "H", "2H", "D", "MS", "QS", "AS"] +freqs_small_to_large_valid = [ + "15T", + "H", + "D", + "MS", + "QS", + "QS-FEB", + "AS", + "AS-APR", +] +invalid_freq = ["T", "5T", "2H", "5D", "3MS"] @pytest.mark.parametrize("count", range(1, 30)) @@ -15,15 +26,6 @@ def test_longestshortestfreq(count): assert tools.freq.shortest(*freqs) == freqs_small_to_large[min(indices)] -@pytest.mark.parametrize("freq1", freqs_small_to_large) -@pytest.mark.parametrize("freq2", freqs_small_to_large) -def test_frequpordown(freq1, freq2): - i1 = freqs_small_to_large.index(freq1) - i2 = freqs_small_to_large.index(freq2) - outcome = np.sign(i1 - i2) - assert tools.freq.up_or_down(freq1, freq2) == outcome - - @pytest.mark.parametrize("tz", [None, "Europe/Berlin", "Asia/Kolkata"]) @pytest.mark.parametrize( ("start", "end", "expected"), @@ -82,90 +84,251 @@ def test_fromtdelta_dst(start, end, expected): assert result == expected -@pytest.mark.parametrize("indexorframe", ["index", "series"]) @pytest.mark.parametrize( - ("freq", "num", "wanted", "strict", "expected"), + ("freq", "num", "wanted", "expected"), [ # D # . enough - ("D", 10, None, False, "D"), - ("D", 10, None, True, "D"), - ("D", 10, "MS", False, ValueError), - ("D", 10, "MS", True, ValueError), - ("D", 10, "D", False, "D"), - ("D", 10, "D", True, "D"), + ("D", 10, "MS", ValueError), + ("D", 10, "D", "D"), # . too few - ("D", 2, None, False, None), - ("D", 2, None, True, ValueError), - ("D", 2, "MS", False, ValueError), - ("D", 2, "MS", True, ValueError), - ("D", 2, "D", False, "D"), - ("D", 2, "D", True, "D"), + ("D", 2, "MS", ValueError), + ("D", 2, "D", "D"), # 15T, too few - ("15T", 2, None, False, None), - ("15T", 2, None, True, ValueError), - ("15T", 2, "MS", False, ValueError), - ("15T", 2, "MS", True, ValueError), - ("15T", 2, "15T", False, "15T"), - ("15T", 2, "15T", True, "15T"), - # invalid freq, not correctable + ("15T", 2, "MS", ValueError), + ("15T", 2, "15T", "15T"), + # invalid freq # . enough - ("2D", 10, None, False, "2D"), - ("2D", 10, None, True, ValueError), - ("2D", 10, "MS", False, ValueError), - ("2D", 10, "MS", True, ValueError), - ("2D", 10, "2D", False, "2D"), - ("2D", 10, "2D", True, ValueError), + ("2D", 10, "MS", ValueError), + ("2D", 10, "2D", "2D"), # . too few - ("2D", 2, None, False, None), - ("2D", 2, None, True, ValueError), - ("2D", 2, "MS", False, ValueError), - ("2D", 2, "MS", True, ValueError), - ("2D", 2, "2D", False, "2D"), - ("2D", 2, "2D", True, ValueError), - # invalid freq, correctable + ("2D", 2, "MS", ValueError), + ("2D", 2, "2D", "2D"), + # uncommon freq # . enough - ("QS-APR", 10, None, False, "QS"), - ("QS-APR", 10, None, True, "QS"), - ("QS-APR", 10, "MS", False, ValueError), - ("QS-APR", 10, "MS", True, ValueError), - ("QS-APR", 10, "QS", False, "QS"), - ("QS-APR", 10, "QS", True, "QS"), + ("QS-APR", 10, "MS", ValueError), + ("QS-APR", 10, "QS", "QS"), + ("QS-APR", 10, "QS-FEB", ValueError), + ("QS", 10, "QS-APR", "QS-APR"), # . too few - ("QS-APR", 2, None, False, None), - ("QS-APR", 2, None, True, ValueError), - ("QS-APR", 2, "MS", False, ValueError), - ("QS-APR", 2, "MS", True, ValueError), - ("QS-APR", 2, "QS", False, "QS"), - ("QS-APR", 2, "QS", True, "QS"), + ("QS-APR", 2, "MS", ValueError), + ("QS-APR", 2, "QS", "QS"), + ("QS-APR", 2, "QS-FEB", ValueError), + ("QS", 2, "QS-APR", "QS-APR"), ], ) +@pytest.mark.parametrize("tz", [None, "Europe/Berlin"]) def test_setfreq( freq: str, num: int, wanted: str, - strict: bool, + tz: str, expected: str | Exception, - indexorframe: str, ): - i = pd.date_range("2020", periods=num, freq=freq) + i = pd.date_range("2020", periods=num, freq=freq, tz=tz) i.freq = None - if indexorframe == "index": - inputvalue = i - fn = tools.freq.set_to_index - else: - inputvalue = pd.Series(np.random.rand(num), i) - fn = tools.freq.set_to_frame + inputvalue = pd.Series(np.random.rand(num), i) + # Test. + if isinstance(expected, type) and issubclass(expected, Exception): + with pytest.raises(expected): + _ = tools.freq.set_to_frame(inputvalue, wanted) + return + result = tools.freq.set_to_frame(inputvalue, wanted) + + outputfreq = result.index.freq + assert outputfreq == expected + +@pytest.mark.parametrize( + ("freq", "num", "expected"), + [ + # D + # . enough + ("D", 10, "D"), + # . too few + ("D", 2, None), + # 15T, too few + ("15T", 2, None), + # invalid freq + # . enough + ("2D", 10, "2D"), + # . too few + ("2D", 2, None), + # uncommon freq + # . enough + ("QS-APR", 10, "QS"), + ("QS", 10, "QS"), + ("QS-FEB", 10, "QS-FEB"), + ("QS-MAY", 10, "QS-FEB"), + # . too few + ("QS-APR", 2, None), + ("QS", 2, None), + ("AS-FEB", 10, "AS-FEB"), + ], +) +@pytest.mark.parametrize("tz", [None, "Europe/Berlin"]) +def test_guessfreq( + freq: str, + num: int, + tz: str, + expected: str | Exception, +): + i = pd.date_range("2020", periods=num, freq=freq, tz=tz) + i.freq = None + + inputvalue = pd.Series(np.random.rand(num), i) # Test. if isinstance(expected, type) and issubclass(expected, Exception): with pytest.raises(expected): - _ = fn(inputvalue, wanted, strict) + _ = tools.freq.guess_to_frame(inputvalue) return - result = fn(inputvalue, wanted, strict) - if indexorframe == "index": - outputfreq = result.freq - else: - outputfreq = result.index.freq + result = tools.freq.guess_to_frame(inputvalue) + + outputfreq = result.index.freq assert outputfreq == expected + + +# Define your frequencies and their validity +freqs_with_validity = [ + ("15T", True), + ("30T", False), + ("D", True), + ("H", True), + ("MS", True), + ("QS", True), + ("AS", True), + ("AS-APR", True), + ("QS-FEB", True), + ("T", False), + ("5T", False), + ("2H", False), + ("5D", False), + ("3MS", False), +] + + +@pytest.mark.parametrize("freq, is_valid", freqs_with_validity) +def test_freq_validity(freq: str, is_valid: bool): + if is_valid: + # No exception should be raised for valid frequencies + tools.freq.assert_freq_valid(freq) + else: + # ValueError should be raised for invalid frequencies + with pytest.raises(ValueError): + _ = tools.freq.assert_freq_valid(freq) + + +@pytest.mark.parametrize( + ("freq1", "freq2", "strict", "is_supposed_to_fail"), + [ + ("15T", "15T", False, False), + ("15T", "15T", True, True), + ("H", "15T", True, False), + ("15T", "H", True, True), + ("15T", "H", False, True), + ("MS", "MS", True, True), + ("MS", "MS", False, False), + ("MS", "QS-APR", False, True), + ("QS", "AS", True, True), + ("QS", "QS-APR", False, False), + ("QS-FEB", "QS-APR", True, True), + ("QS-FEB", "QS-APR", False, False), + ("AS", "QS", False, False), + ("QS-APR", "AS-APR", False, True), + ], +) +def test_freq_sufficiently_long( + freq1: str, freq2: str, strict: bool, is_supposed_to_fail: bool +): + if is_supposed_to_fail: + with pytest.raises(AssertionError): + _ = tools.freq.assert_freq_sufficiently_long(freq1, freq2, strict) + else: + tools.freq.assert_freq_sufficiently_long(freq1, freq2, strict) + + +@pytest.mark.parametrize( + ("freq1", "freq2", "is_supposed_to_fail"), + [ + ("15T", "15T", False), + ("H", "15T", True), + ("15T", "H", True), + ("MS", "MS", False), + ("MS", "QS-APR", True), + ("QS", "AS", True), + ("QS", "QS-APR", False), + ("QS-FEB", "QS-APR", False), + ("AS", "QS", True), + ("QS-APR", "AS-APR", True), + ("AS-APR", "AS-FEB", False), + ], +) +def test_freq_equally_long(freq1: str, freq2: str, is_supposed_to_fail: bool): + if is_supposed_to_fail: + with pytest.raises(AssertionError): + _ = tools.freq.assert_freq_equally_long(freq1, freq2) + else: + tools.freq.assert_freq_equally_long(freq1, freq2) + + +@pytest.mark.parametrize( + ("freq1", "freq2", "strict", "is_supposed_to_fail"), + [ + ("15T", "15T", False, False), + ("15T", "15T", True, True), + ("H", "15T", True, True), + ("15T", "H", True, False), + ("15T", "H", False, False), + ("MS", "MS", True, True), + ("MS", "MS", False, False), + ("MS", "QS-APR", False, False), + ("QS", "AS", True, False), + ("QS", "QS-APR", False, False), + ("QS-FEB", "QS-APR", True, True), + ("QS-FEB", "QS-APR", False, False), + ("AS", "QS", False, True), + ("QS-APR", "AS-APR", False, False), + ], +) +def test_freq_sufficiently_short( + freq1: str, freq2: str, strict: bool, is_supposed_to_fail: bool +): + if is_supposed_to_fail: + with pytest.raises(AssertionError): + _ = tools.freq.assert_freq_sufficiently_short(freq1, freq2, strict) + else: + tools.freq.assert_freq_sufficiently_short(freq1, freq2, strict) + + +@pytest.mark.parametrize( + ("source_freq", "ref_freq", "expected"), + [ + # downsampling + ("D", "MS", -1), + ("MS", "QS", -1), + ("MS", "QS-APR", -1), + ("QS", "AS-APR", -1), + ("QS", "AS", -1), + # upsampling + ("QS", "D", 1), + ("AS-APR", "QS", 1), + # the same + ("MS", "MS", 0), + ("QS", "QS", 0), + ("QS", "QS-APR", 0), + ("QS", "QS-JAN", 0), + # ValueError + ("QS", "QS-FEB", ValueError), + ("QS", "AS-FEB", ValueError), + ("AS-APR", "AS", ValueError), + ("AS-FEB", "QS", ValueError), + ], +) +def test_up_pr_down2(source_freq: str, ref_freq: str, expected: int | Exception): + if isinstance(expected, type) and issubclass(expected, Exception): + with pytest.raises(expected): + tools.freq.up_or_down(source_freq, ref_freq) + else: + result = tools.freq.up_or_down(source_freq, ref_freq) + assert result == expected diff --git a/tests/tools/test_intersect.py b/tests/tools/test_intersect.py index 4a7c6a6..18c1770 100644 --- a/tests/tools/test_intersect.py +++ b/tests/tools/test_intersect.py @@ -5,6 +5,19 @@ from portfolyo import testing, tools +TEST_FREQUENCIES = [ + "AS", + "AS-FEB", + "AS-APR", + "QS", + "QS-FEB", + "QS-APR", + "MS", + "D", + "H", + "15T", +] + COMMON_END = "2022-02-02" TESTCASES = [ # startdates, freq, expected_startdate @@ -146,7 +159,7 @@ def test_intersect_distinctstartofday( @pytest.mark.parametrize("tz", [None, "Europe/Berlin", "Asia/Kolkata"]) @pytest.mark.parametrize("indexorframe", ["idx", "fr"]) -@pytest.mark.parametrize("freq", tools.freq.FREQUENCIES) +@pytest.mark.parametrize("freq", TEST_FREQUENCIES) @pytest.mark.parametrize("starttime", ["00:00", "06:00"]) def test_intersect_nooverlap(indexorframe: str, tz: str, freq: str, starttime: str): """Test if intersection of non-overlapping indices gives correct result.""" diff --git a/tests/tools/test_righttoleft.py b/tests/tools/test_righttoleft.py index 903d259..ae594d5 100644 --- a/tests/tools/test_righttoleft.py +++ b/tests/tools/test_righttoleft.py @@ -32,6 +32,10 @@ ("2020-07-01", 12, "MS", "2020-06-01"), ("2020-04-01", 4, "QS", "2020-01-01"), ("2020-07-01", 4, "QS", "2020-04-01"), + ("2020-04-01", 4, "QS-APR", "2020-01-01"), + ("2020-07-01", 4, "QS-APR", "2020-04-01"), + ("2020-05-01", 4, "QS-FEB", "2020-02-01"), + ("2020-08-01", 4, "QS-FEB", "2020-05-01"), # Unnatural-boundary. # Unnatural-boundary timestamps; without DST-transition. ("2020-02-01 01:30", 24, "H", "2020-02-01 00:30"), @@ -112,10 +116,19 @@ def test_righttoleft( start: str, periods: int, freq: str, expected_start: str, tz: str, remove_freq: str ): """Test if index of rightbound timestamps can be make leftbound.""" + # Input. i = pd.date_range(start, periods=periods, freq=freq, tz=tz) - expected = pd.date_range(expected_start, periods=periods, freq=freq, tz=tz) if remove_freq == "remove": i.freq = None + + # Expected output. + if remove_freq == "remove" and freq == "QS-APR": + freq_expected = "QS" # if freq removed, expect 'base case' + else: + freq_expected = freq + expected = pd.date_range(expected_start, periods=periods, freq=freq_expected, tz=tz) + + # Actual output. result = tools.righttoleft.index(i) testing.assert_index_equal(result, expected) diff --git a/tests/tools/test_standardize.py b/tests/tools/test_standardize.py index 66851fd..93ca893 100644 --- a/tests/tools/test_standardize.py +++ b/tests/tools/test_standardize.py @@ -4,6 +4,19 @@ from portfolyo import dev, tools +TEST_FREQUENCIES = [ + "AS", + "AS-FEB", + "AS-APR", + "QS", + "QS-FEB", + "QS-APR", + "MS", + "D", + "H", + "15T", +] + @pytest.mark.parametrize("series_or_df", ["series", "df"]) @pytest.mark.parametrize("bound", ["right", "left"]) @@ -73,6 +86,7 @@ def test_standardize_DST( result = tools.standardize.frame(expected, force, **kw) pd.testing.assert_series_equal(result, expected) # 2: Series. + # series = pd.Series(in_vals, iin) result = tools.standardize.frame( pd.Series(in_vals, iin), force, bound=bound, **kw ) @@ -94,7 +108,7 @@ def test_standardize_DST( @pytest.mark.parametrize("out_tz", [None, "Europe/Berlin"]) @pytest.mark.parametrize("floating", [True, False]) @pytest.mark.parametrize("bound", ["left", "right"]) -@pytest.mark.parametrize("freq", tools.freq.FREQUENCIES) +@pytest.mark.parametrize("freq", TEST_FREQUENCIES) def test_standardize_convert(freq, in_tz, floating, series_or_df, bound, out_tz): """Test raising errors when conversing timezones.""" force = "aware" if out_tz else "agnostic" @@ -138,7 +152,7 @@ def test_standardize_convert(freq, in_tz, floating, series_or_df, bound, out_tz) @pytest.mark.parametrize("in_tz", [None, "Europe/Berlin"]) @pytest.mark.parametrize("floating", [True, False]) @pytest.mark.parametrize("force", ["agnostic", "aware"]) -@pytest.mark.parametrize("freq", [*tools.freq.FREQUENCIES, "Q", "M", "AS-FEB"]) +@pytest.mark.parametrize("freq", TEST_FREQUENCIES) def test_standardize_freq(freq, in_tz, floating, series_or_df, force): """Test raising errors when passing invalid frequencies.""" out_tz = "Europe/Berlin" @@ -150,10 +164,13 @@ def test_standardize_freq(freq, in_tz, floating, series_or_df, force): fr = dev.get_series(i) if series_or_df == "series" else dev.get_dataframe(i) # See if error is raised. - if freq not in tools.freq.FREQUENCIES: + try: + tools.freq.assert_freq_valid(freq) + except ValueError: + # freq isn't valid, check that standardize will also raise ValueError with pytest.raises(ValueError): _ = tools.standardize.frame(fr, force, tz=out_tz, floating=floating) - return + return # freq was invalid, and standardize correctly raised ValueError result = tools.standardize.frame(fr, force, tz=out_tz, floating=floating) assert result.index.freq == freq @@ -162,7 +179,7 @@ def test_standardize_freq(freq, in_tz, floating, series_or_df, force): @pytest.mark.parametrize("series_or_df", ["series", "df"]) @pytest.mark.parametrize("remove", ["remove_some", "remove_none"]) @pytest.mark.parametrize("in_tz", [None, "Europe/Berlin"]) -@pytest.mark.parametrize("freq", tools.freq.FREQUENCIES) +@pytest.mark.parametrize("freq", TEST_FREQUENCIES) def test_standardize_gaps(freq, in_tz, remove, series_or_df): """Test raising errors on index with gaps. Don't test timezone-conversion.""" force = "agnostic" if in_tz is None else "aware" @@ -181,7 +198,7 @@ def test_standardize_gaps(freq, in_tz, remove, series_or_df): # See if error is raised. if ( # fr has frequency, but it's a forbidden frequency - (remove == "remove_none" and freq not in tools.freq.FREQUENCIES) + (remove == "remove_none" and freq not in TEST_FREQUENCIES) # fr does not have frequency or (remove == "remove_some") ): diff --git a/tests/tools/test_tzone.py b/tests/tools/test_tzone.py index f4fd775..23ca810 100644 --- a/tests/tools/test_tzone.py +++ b/tests/tools/test_tzone.py @@ -122,7 +122,7 @@ def test_conversionAtoA_fromexcel(aggfreq, tzt_in, tzt_out, seriesordf): def conversion_fn(fr): floating = tzt_in.floating or tzt_out.floating fr_out = tools.tzone._A_to_A(fr, tz=tzt_out.explicit, floating=floating) - return tools.freq.set_to_frame(fr_out, aggfreq) + return fr_out do_test_conversion(aggfreq, tzt_in, tzt_out, seriesordf, conversion_fn)