deleted longer_or_shorter, changed up_or_down, added AS-APR freq to docs

rwijtvliet · May 21, 2024 · c1e3108 · c1e3108
1 parent 9af1b85
commit c1e3108
Show file tree

Hide file tree

Showing 8 changed files with 70 additions and 133 deletions.
diff --git a/dev_scripts/check.py b/dev_scripts/check.py
diff --git a/docs/specialized_topics/dataprep.rst b/docs/specialized_topics/dataprep.rst
@@ -76,7 +76,8 @@ The index must have a frequency (``fr.index.freq``); it must be one of the ones
 * ``D``: daily;
 * ``MS``: monthly;
 * ``QS``: quarterly;
-* ``AS``: yearly.
+* ``AS``: yearly (starting from January);
+* ``AS-APR``: yearly (starting from April);
 
 If the frequency is not set, we can try to make pandas infer it:
 

diff --git a/portfolyo/core/pfline/decorators.py b/portfolyo/core/pfline/decorators.py
@@ -1,6 +1,5 @@
 """Module with decorators to catch (and possibly correct) common situations."""
 
-
 import warnings
 
 from ... import tools
@@ -9,7 +8,7 @@
 def assert_longest_allowed_freq(freq):
     def decorator(fn):
         def wrapped(self, *args, **kwargs):
-            if tools.freq.longer_or_shorter(self.index.freq, freq) == 1:
+            if tools.freq.up_or_down(self.index.freq, freq) == 1:
                 raise ValueError(
                     "The frequency of the index is too long; longest allowed:"
                     f" {freq}; passed: {self.index.freq}."
@@ -24,7 +23,7 @@ def wrapped(self, *args, **kwargs):
 def assert_shortest_allowed_freq(freq):
     def decorator(fn):
         def wrapped(self, *args, **kwargs):
-            if tools.freq.longer_or_shorter(self.index.freq, freq) == -1:
+            if tools.freq.up_or_down(self.index.freq, freq) == -1:
                 raise ValueError(
                     "The frequency of the index is too short; shortest allowed:"
                     f" {freq}; passed: {self.index.freq}."

diff --git a/portfolyo/tools/freq.py b/portfolyo/tools/freq.py
@@ -13,92 +13,81 @@
 FREQUENCIES = ["AS", "QS", "MS", "D", "H", "15T", "AS-APR"]
 
 
+STANDARD_COMMON_TS = pd.Timestamp("2020-01-01 0:00")
+BACKUP_COMMON_TS = pd.Timestamp("2020-02-03 04:05:06")
+
+
 def up_or_down(
     freq_source: str, freq_target: str, common_ts: pd.Timestamp = None
 ) -> int:
     """
-    Compare source frequency with target frequency to see if it needs up- or downsampling.
-
-    Upsampling means that the number of values increases - one value in the source
-    corresponds to multiple values in the target.
-
-    Parameters
-    ----------
-    freq_source, freq_target : frequencies to compare.
-    common_ts : timestamp, optional
-        Timestamp to use as anchor from which to compare the two.
-
-    Returns
-    -------
-    * 1 if source frequency must be upsampled to obtain (i.e, is longer than) target frequency.
-    * 0 if source frequency is same as target frequency.
-    * -1 if source frequency must be downsampled to obtain (i.e, is shorter than) target frequency.
-
-    Notes
-    -----
-    Arbitrarily using a time point as anchor to calculate the length of the time period
-    from. May have influence on the ratio (duration of a month, quarter, year etc are
-    influenced by this), but, for most common frequencies, not on which is longer.
-
-    Examples
-    --------
+        Compare source frequency with target frequency to see if it needs up- or downsampling.
+
+        Upsampling means that the number of values increases - one value in the source
+        corresponds to multiple values in the target.
+
+        Parameters
+        ----------
+        freq_source, freq_target : frequencies to compare.
+        common_ts : timestamp, optional
+            Timestamp to use as anchor from which to compare the two.
+
+        Returns
+        -------
+        * 1 if source frequency must be upsampled to obtain (i.e, is longer than) target
+            frequency. There is a 1:n relationship between the number of values.
+        * 0 if source frequency is same as target frequency. There is a 1:1 relationship
+            between the number of values (i.e., there is no change).
+        * -1 if source frequency must be downsampled to obtain (i.e, is shorter than)
+            target frequency. There is a n:1 relationship between the number of values.
+
+        Notes
+        -----
+        Arbitrarily using a time point as anchor to calculate the length of the time period
+        from. May have influence on the ratio (duration of a month, quarter, year etc are
+        influenced by this), but, for most common frequencies, not on which is longer.
+        If a 1:n, 1:1 or n:1 mapping does not exist - e.g., when mapping between AS and AS-APR,
+        a ValueError is raised.
+
+        Examples
+        --------
     >>> freq.up_or_down('D', 'MS')
-    -1
+        -1
     >>> freq.up_or_down('MS', 'D')
-    1
+        1
     >>> freq.up_or_down('MS', 'MS')
-    0
+        0
+    >>> freq.up_or_down('AS', 'AS-APR')
+        raises ValueError
     """
-    standard_common_ts = pd.Timestamp("2020-01-01 0:00")
-    backup_common_ts = pd.Timestamp("2020-02-03 04:05:06")
     if common_ts is None:
-        common_ts = standard_common_ts
-    ts1 = common_ts + pd.tseries.frequencies.to_offset(freq_source)
-    ts2 = common_ts + pd.tseries.frequencies.to_offset(freq_target)
+        common_ts = STANDARD_COMMON_TS
+
+    freq_source_as_offset = pd.tseries.frequencies.to_offset(freq_source)
+    freq_target_as_offset = pd.tseries.frequencies.to_offset(freq_target)
+    # Check if they are of the same base frequency but different subtypes
+    if (
+        type(freq_source_as_offset) is type(freq_target_as_offset)
+        and freq_source_as_offset != freq_target_as_offset
+        and freq_source_as_offset.n == 1
+        and freq_target_as_offset.n == 1
+    ):  # catch AS and AS-APR case
+        raise ValueError(
+            "No 1:1, 1:n, or n:1 mapping exists between source and target frequency."
+        )
+
+    ts1 = common_ts + freq_source_as_offset
+    ts2 = common_ts + freq_target_as_offset
     if ts1 > ts2:
         return 1
     elif ts1 < ts2:
         return -1
-    if common_ts == standard_common_ts:
+    if common_ts == STANDARD_COMMON_TS:
         # If they are the same, try with another timestamp.
-        return up_or_down(freq_source, freq_target, backup_common_ts)
+        return up_or_down(freq_source, freq_target, BACKUP_COMMON_TS)
     return 0  # only if both give the same answer.
 
 
-def longer_or_shorter(freq: str, freq_ref: str, common_ts: pd.Timedelta = None) -> int:
-    """
-    Compare frequency with reference frequency to see if it is longer or shorter.
-
-    Parameters
-    ----------
-    freq, freq_ref : frequencies to compare.
-    common_ts : timestamp, optional
-        Timestamp to use as anchor from which to compare the two.
-
-    Returns
-    -------
-    * 1 if frequency ``freq`` is longer than the reference frequency ``freq_ref``.
-    * 0 if frequencies are the same.
-    * -1 if frequency ``freq`` is shorter than the reference frequency ``freq_ref``.
-
-    Notes
-    -----
-    Arbitrarily using a time point as anchor to calculate the length of the time period
-    from. May have influence on the ratio (duration of a month, quarter, year etc are
-    influenced by this), but, for most common frequencies, not on which is longer.
-
-    Examples
-    --------
-    >>> freq.longer_or_shorter('D', 'MS')
-    -1
-    >>> freq.longer_or_shorter('MS', 'D')
-    1
-    >>> freq.longer_or_shorter('MS', 'MS')
-    0
-    """
-    return up_or_down(freq, freq_ref, common_ts)
-
-
 def _longestshortest(shortest: bool, *freqs: str):
     """Determine which frequency denotes the shortest or longest time period."""
     common_ts = pd.Timestamp("2020-01-01")

diff --git a/portfolyo/tools/intersect.py b/portfolyo/tools/intersect.py
@@ -3,7 +3,7 @@
 from portfolyo import tools
 
 from portfolyo.tools.right import stamp
-from portfolyo.tools.freq import longest, longer_or_shorter
+from portfolyo.tools.freq import longest, up_or_down
 from datetime import datetime
 
 
@@ -120,7 +120,7 @@ def indices_flex(
     if len(distinct_sod) != 1 and ignore_start_of_day is False:
         raise ValueError(f"Indices must have equal start-of-day; got {distinct_sod}.")
     for i in range(len(idxs)):
-        if len(distinct_sod) != 1 and longer_or_shorter(idxs[i].freq, "D") == -1:
+        if len(distinct_sod) != 1 and up_or_down(idxs[i].freq, "D") == -1:
             raise ValueError(
                 "Downsample all indices to daily-or-longer, or trim them so they have the same start-of-day, before attempting to calculate the intersection"
             )

diff --git a/portfolyo/visualize/plot.py b/portfolyo/visualize/plot.py
@@ -209,7 +209,7 @@ def get_portfolyo_attr(ax, name, default_val=None):
 
 def is_categorical(s: pd.Series) -> bool:
     """The function checks whether frequency of panda Series falls into continous or categorical group"""
-    return tools_freq.longer_or_shorter(s.index.freq, "D") == 1
+    return tools_freq.up_or_down(s.index.freq, "D") == 1
 
 
 def prepare_ax_and_s(ax: plt.Axes, s: pd.Series, unit=None) -> pd.Series:

diff --git a/tests/core/pfline/test_flat.py b/tests/core/pfline/test_flat.py
@@ -113,14 +113,14 @@ def test_flatpfline_asfreqcorrect2(freq, newfreq, columns, tz):
     a, (m, d) = a + 3, np.array([1, 1]) + np.random.randint(0, 12, 2)  # each + 0..11
     end = f"{a}-{m}-{d}"
 
-    # Filter out invalid combinations
-    if (freq == "AS" and newfreq == "AS-APR") or (freq == "AS-APR" and newfreq == "AS"):
-        pytest.skip("Skipping invalid combination of freq and newfreq")
-
     i = pd.date_range(start, end, freq=freq, inclusive="left", tz=tz)
     df = dev.get_dataframe(i, columns)
     pfl1 = create.flatpfline(df)
-    pfl2 = pfl1.asfreq(newfreq)
+    try:
+        pfl2 = pfl1.asfreq(newfreq)
+    # Catch invalid combinations
+    except ValueError:
+        return
 
     # Compare the dataframes, only keep time intervals that are in both objects.
     if pfl1.kind is Kind.PRICE:
@@ -141,46 +141,6 @@ def test_flatpfline_asfreqcorrect2(freq, newfreq, columns, tz):
     testing.assert_series_equal(df1.apply(np.sum), df2.apply(np.sum))
 
 
-@pytest.mark.parametrize("tz", [None, "Europe/Berlin"])
-@pytest.mark.parametrize("kind", [Kind.COMPLETE, Kind.VOLUME, Kind.PRICE])
-@pytest.mark.parametrize("childcount", [0, 1, 2, 3])
-def test_pfline_asfreq_apr(
-    kind: Kind,
-    tz: str,
-    childcount: int,
-):
-    index = pd.date_range(
-        "2020-01-01", "2023-01-01", freq="AS", tz=tz, inclusive="left"
-    )
-    index2 = pd.date_range(
-        "2020-04-01", "2022-04-01", freq="AS-APR", tz=tz, inclusive="left"
-    )
-    pfl = dev.get_pfline(index, childcount=childcount, kind=kind)
-    pfl_as = pfl.asfreq("AS-APR")
-    new_pfl = dev.get_pfline(index2, childcount=childcount, kind=kind)
-    testing.assert_index_equal(pfl_as.index, new_pfl.index)
-
-
-@pytest.mark.parametrize("tz", [None, "Europe/Berlin"])
-@pytest.mark.parametrize("kind", [Kind.COMPLETE, Kind.VOLUME, Kind.PRICE])
-@pytest.mark.parametrize("childcount", [0, 1, 2, 3])
-def test_pfline_as_apr_to_as(
-    kind: Kind,
-    tz: str,
-    childcount: int,
-):
-    index = pd.date_range(
-        "2020-04-01", "2023-04-01", freq="AS-APR", tz=tz, inclusive="left"
-    )
-    index2 = pd.date_range(
-        "2021-01-01", "2023-01-01", freq="AS", tz=tz, inclusive="left"
-    )
-    pfl = dev.get_pfline(index, childcount=childcount, kind=kind)
-    pfl_as = pfl.asfreq("AS")
-    new_pfl = dev.get_pfline(index2, childcount=childcount, kind=kind)
-    testing.assert_index_equal(pfl_as.index, new_pfl.index)
-
-
 @pytest.mark.parametrize("freq", ["15T", "H", "D"])
 @pytest.mark.parametrize("newfreq", ["MS", "QS", "AS", "AS-APR"])
 @pytest.mark.parametrize("kind", [Kind.COMPLETE, Kind.VOLUME, Kind.PRICE])

diff --git a/tests/tools/test_freq.py b/tests/tools/test_freq.py
@@ -24,7 +24,6 @@ def test_frequpordown_freqlongerorshorter(freq1, freq2):
     i2 = freqs_small_to_large.index(freq2)
     outcome = np.sign(i1 - i2)
     assert tools.freq.up_or_down(freq1, freq2) == outcome
-    assert tools.freq.longer_or_shorter(freq1, freq2) == outcome
 
 
 @pytest.mark.parametrize("tz", [None, "Europe/Berlin", "Asia/Kolkata"])