Skip to content

Commit

Permalink
deleted longer_or_shorter, changed up_or_down, added AS-APR freq to docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Alina Voilova committed May 21, 2024
1 parent 9af1b85 commit c1e3108
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 133 deletions.
11 changes: 0 additions & 11 deletions dev_scripts/check.py

This file was deleted.

3 changes: 2 additions & 1 deletion docs/specialized_topics/dataprep.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ The index must have a frequency (``fr.index.freq``); it must be one of the ones
* ``D``: daily;
* ``MS``: monthly;
* ``QS``: quarterly;
* ``AS``: yearly.
* ``AS``: yearly (starting from January);
* ``AS-APR``: yearly (starting from April);

If the frequency is not set, we can try to make pandas infer it:

Expand Down
5 changes: 2 additions & 3 deletions portfolyo/core/pfline/decorators.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Module with decorators to catch (and possibly correct) common situations."""


import warnings

from ... import tools
Expand All @@ -9,7 +8,7 @@
def assert_longest_allowed_freq(freq):
def decorator(fn):
def wrapped(self, *args, **kwargs):
if tools.freq.longer_or_shorter(self.index.freq, freq) == 1:
if tools.freq.up_or_down(self.index.freq, freq) == 1:
raise ValueError(
"The frequency of the index is too long; longest allowed:"
f" {freq}; passed: {self.index.freq}."
Expand All @@ -24,7 +23,7 @@ def wrapped(self, *args, **kwargs):
def assert_shortest_allowed_freq(freq):
def decorator(fn):
def wrapped(self, *args, **kwargs):
if tools.freq.longer_or_shorter(self.index.freq, freq) == -1:
if tools.freq.up_or_down(self.index.freq, freq) == -1:
raise ValueError(
"The frequency of the index is too short; shortest allowed:"
f" {freq}; passed: {self.index.freq}."
Expand Down
127 changes: 58 additions & 69 deletions portfolyo/tools/freq.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,92 +13,81 @@
FREQUENCIES = ["AS", "QS", "MS", "D", "H", "15T", "AS-APR"]


STANDARD_COMMON_TS = pd.Timestamp("2020-01-01 0:00")
BACKUP_COMMON_TS = pd.Timestamp("2020-02-03 04:05:06")


def up_or_down(
freq_source: str, freq_target: str, common_ts: pd.Timestamp = None
) -> int:
"""
Compare source frequency with target frequency to see if it needs up- or downsampling.
Upsampling means that the number of values increases - one value in the source
corresponds to multiple values in the target.
Parameters
----------
freq_source, freq_target : frequencies to compare.
common_ts : timestamp, optional
Timestamp to use as anchor from which to compare the two.
Returns
-------
* 1 if source frequency must be upsampled to obtain (i.e, is longer than) target frequency.
* 0 if source frequency is same as target frequency.
* -1 if source frequency must be downsampled to obtain (i.e, is shorter than) target frequency.
Notes
-----
Arbitrarily using a time point as anchor to calculate the length of the time period
from. May have influence on the ratio (duration of a month, quarter, year etc are
influenced by this), but, for most common frequencies, not on which is longer.
Examples
--------
Compare source frequency with target frequency to see if it needs up- or downsampling.
Upsampling means that the number of values increases - one value in the source
corresponds to multiple values in the target.
Parameters
----------
freq_source, freq_target : frequencies to compare.
common_ts : timestamp, optional
Timestamp to use as anchor from which to compare the two.
Returns
-------
* 1 if source frequency must be upsampled to obtain (i.e, is longer than) target
frequency. There is a 1:n relationship between the number of values.
* 0 if source frequency is same as target frequency. There is a 1:1 relationship
between the number of values (i.e., there is no change).
* -1 if source frequency must be downsampled to obtain (i.e, is shorter than)
target frequency. There is a n:1 relationship between the number of values.
Notes
-----
Arbitrarily using a time point as anchor to calculate the length of the time period
from. May have influence on the ratio (duration of a month, quarter, year etc are
influenced by this), but, for most common frequencies, not on which is longer.
If a 1:n, 1:1 or n:1 mapping does not exist - e.g., when mapping between AS and AS-APR,
a ValueError is raised.
Examples
--------
>>> freq.up_or_down('D', 'MS')
-1
-1
>>> freq.up_or_down('MS', 'D')
1
1
>>> freq.up_or_down('MS', 'MS')
0
0
>>> freq.up_or_down('AS', 'AS-APR')
raises ValueError
"""
standard_common_ts = pd.Timestamp("2020-01-01 0:00")
backup_common_ts = pd.Timestamp("2020-02-03 04:05:06")
if common_ts is None:
common_ts = standard_common_ts
ts1 = common_ts + pd.tseries.frequencies.to_offset(freq_source)
ts2 = common_ts + pd.tseries.frequencies.to_offset(freq_target)
common_ts = STANDARD_COMMON_TS

freq_source_as_offset = pd.tseries.frequencies.to_offset(freq_source)
freq_target_as_offset = pd.tseries.frequencies.to_offset(freq_target)
# Check if they are of the same base frequency but different subtypes
if (
type(freq_source_as_offset) is type(freq_target_as_offset)
and freq_source_as_offset != freq_target_as_offset
and freq_source_as_offset.n == 1
and freq_target_as_offset.n == 1
): # catch AS and AS-APR case
raise ValueError(
"No 1:1, 1:n, or n:1 mapping exists between source and target frequency."
)

ts1 = common_ts + freq_source_as_offset
ts2 = common_ts + freq_target_as_offset
if ts1 > ts2:
return 1
elif ts1 < ts2:
return -1
if common_ts == standard_common_ts:
if common_ts == STANDARD_COMMON_TS:
# If they are the same, try with another timestamp.
return up_or_down(freq_source, freq_target, backup_common_ts)
return up_or_down(freq_source, freq_target, BACKUP_COMMON_TS)
return 0 # only if both give the same answer.


def longer_or_shorter(freq: str, freq_ref: str, common_ts: pd.Timedelta = None) -> int:
"""
Compare frequency with reference frequency to see if it is longer or shorter.
Parameters
----------
freq, freq_ref : frequencies to compare.
common_ts : timestamp, optional
Timestamp to use as anchor from which to compare the two.
Returns
-------
* 1 if frequency ``freq`` is longer than the reference frequency ``freq_ref``.
* 0 if frequencies are the same.
* -1 if frequency ``freq`` is shorter than the reference frequency ``freq_ref``.
Notes
-----
Arbitrarily using a time point as anchor to calculate the length of the time period
from. May have influence on the ratio (duration of a month, quarter, year etc are
influenced by this), but, for most common frequencies, not on which is longer.
Examples
--------
>>> freq.longer_or_shorter('D', 'MS')
-1
>>> freq.longer_or_shorter('MS', 'D')
1
>>> freq.longer_or_shorter('MS', 'MS')
0
"""
return up_or_down(freq, freq_ref, common_ts)


def _longestshortest(shortest: bool, *freqs: str):
"""Determine which frequency denotes the shortest or longest time period."""
common_ts = pd.Timestamp("2020-01-01")
Expand Down
4 changes: 2 additions & 2 deletions portfolyo/tools/intersect.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from portfolyo import tools

from portfolyo.tools.right import stamp
from portfolyo.tools.freq import longest, longer_or_shorter
from portfolyo.tools.freq import longest, up_or_down
from datetime import datetime


Expand Down Expand Up @@ -120,7 +120,7 @@ def indices_flex(
if len(distinct_sod) != 1 and ignore_start_of_day is False:
raise ValueError(f"Indices must have equal start-of-day; got {distinct_sod}.")
for i in range(len(idxs)):
if len(distinct_sod) != 1 and longer_or_shorter(idxs[i].freq, "D") == -1:
if len(distinct_sod) != 1 and up_or_down(idxs[i].freq, "D") == -1:
raise ValueError(
"Downsample all indices to daily-or-longer, or trim them so they have the same start-of-day, before attempting to calculate the intersection"
)
Expand Down
2 changes: 1 addition & 1 deletion portfolyo/visualize/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def get_portfolyo_attr(ax, name, default_val=None):

def is_categorical(s: pd.Series) -> bool:
"""The function checks whether frequency of panda Series falls into continous or categorical group"""
return tools_freq.longer_or_shorter(s.index.freq, "D") == 1
return tools_freq.up_or_down(s.index.freq, "D") == 1


def prepare_ax_and_s(ax: plt.Axes, s: pd.Series, unit=None) -> pd.Series:
Expand Down
50 changes: 5 additions & 45 deletions tests/core/pfline/test_flat.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,14 @@ def test_flatpfline_asfreqcorrect2(freq, newfreq, columns, tz):
a, (m, d) = a + 3, np.array([1, 1]) + np.random.randint(0, 12, 2) # each + 0..11
end = f"{a}-{m}-{d}"

# Filter out invalid combinations
if (freq == "AS" and newfreq == "AS-APR") or (freq == "AS-APR" and newfreq == "AS"):
pytest.skip("Skipping invalid combination of freq and newfreq")

i = pd.date_range(start, end, freq=freq, inclusive="left", tz=tz)
df = dev.get_dataframe(i, columns)
pfl1 = create.flatpfline(df)
pfl2 = pfl1.asfreq(newfreq)
try:
pfl2 = pfl1.asfreq(newfreq)
# Catch invalid combinations
except ValueError:
return

# Compare the dataframes, only keep time intervals that are in both objects.
if pfl1.kind is Kind.PRICE:
Expand All @@ -141,46 +141,6 @@ def test_flatpfline_asfreqcorrect2(freq, newfreq, columns, tz):
testing.assert_series_equal(df1.apply(np.sum), df2.apply(np.sum))


@pytest.mark.parametrize("tz", [None, "Europe/Berlin"])
@pytest.mark.parametrize("kind", [Kind.COMPLETE, Kind.VOLUME, Kind.PRICE])
@pytest.mark.parametrize("childcount", [0, 1, 2, 3])
def test_pfline_asfreq_apr(
kind: Kind,
tz: str,
childcount: int,
):
index = pd.date_range(
"2020-01-01", "2023-01-01", freq="AS", tz=tz, inclusive="left"
)
index2 = pd.date_range(
"2020-04-01", "2022-04-01", freq="AS-APR", tz=tz, inclusive="left"
)
pfl = dev.get_pfline(index, childcount=childcount, kind=kind)
pfl_as = pfl.asfreq("AS-APR")
new_pfl = dev.get_pfline(index2, childcount=childcount, kind=kind)
testing.assert_index_equal(pfl_as.index, new_pfl.index)


@pytest.mark.parametrize("tz", [None, "Europe/Berlin"])
@pytest.mark.parametrize("kind", [Kind.COMPLETE, Kind.VOLUME, Kind.PRICE])
@pytest.mark.parametrize("childcount", [0, 1, 2, 3])
def test_pfline_as_apr_to_as(
kind: Kind,
tz: str,
childcount: int,
):
index = pd.date_range(
"2020-04-01", "2023-04-01", freq="AS-APR", tz=tz, inclusive="left"
)
index2 = pd.date_range(
"2021-01-01", "2023-01-01", freq="AS", tz=tz, inclusive="left"
)
pfl = dev.get_pfline(index, childcount=childcount, kind=kind)
pfl_as = pfl.asfreq("AS")
new_pfl = dev.get_pfline(index2, childcount=childcount, kind=kind)
testing.assert_index_equal(pfl_as.index, new_pfl.index)


@pytest.mark.parametrize("freq", ["15T", "H", "D"])
@pytest.mark.parametrize("newfreq", ["MS", "QS", "AS", "AS-APR"])
@pytest.mark.parametrize("kind", [Kind.COMPLETE, Kind.VOLUME, Kind.PRICE])
Expand Down
1 change: 0 additions & 1 deletion tests/tools/test_freq.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def test_frequpordown_freqlongerorshorter(freq1, freq2):
i2 = freqs_small_to_large.index(freq2)
outcome = np.sign(i1 - i2)
assert tools.freq.up_or_down(freq1, freq2) == outcome
assert tools.freq.longer_or_shorter(freq1, freq2) == outcome


@pytest.mark.parametrize("tz", [None, "Europe/Berlin", "Asia/Kolkata"])
Expand Down

0 comments on commit c1e3108

Please sign in to comment.