From a977bf5cb5457f23e7b984b27fc4afffafa41991 Mon Sep 17 00:00:00 2001 From: Alina Voilova Date: Mon, 15 Apr 2024 10:23:26 +0200 Subject: [PATCH] added valuerror for sod with less than daily freq --- dev_scripts/intersect_test copy 2.py | 79 ++++++++++++++++++++++++++++ portfolyo/tools/intersect.py | 24 ++++----- tests/tools/test_intersect_flex.py | 8 ++- 3 files changed, 96 insertions(+), 15 deletions(-) create mode 100644 dev_scripts/intersect_test copy 2.py diff --git a/dev_scripts/intersect_test copy 2.py b/dev_scripts/intersect_test copy 2.py new file mode 100644 index 0000000..8210189 --- /dev/null +++ b/dev_scripts/intersect_test copy 2.py @@ -0,0 +1,79 @@ +import pandas as pd + +from portfolyo import tools + + +def freq(): + # ignore freq + a = pd.date_range( + "2022-04-01", + "2024-07-01", + freq="QS", + inclusive="left", + ) + b = pd.date_range( + "2021-01-01", + "2024-01-01", + freq="D", + inclusive="left", + ) + intersect = tools.intersect.indices_flex(a, b, ignore_freq=True) + print(intersect) + + +def start_day(): + a = pd.date_range( + "2022-04-01 00:00", "2022-05-10 00:00", freq="15T", inclusive="left" + ) + b = pd.date_range( + "2022-04-01 06:00", "2022-07-15 06:00", freq="15T", inclusive="left" + ) + intersect = tools.intersect.indices_flex(a, b, ignore_start_of_day=True) + print(intersect) + + +def tz(): + a = pd.date_range( + "2022-04-01 00:00", + "2022-05-10 00:00", + freq="H", + tz=None, + inclusive="left", + ) + b = pd.date_range( + "2022-04-25 00:00", + "2022-05-15 00:00", + freq="H", + tz="Europe/Berlin", + inclusive="left", + ) + intersect = tools.intersect.indices_flex(a, b, ignore_tz=True) + print(intersect) + + +def all(): + a = pd.date_range( + "2022-01-01 00:00", + "2023-01-01 00:00", + freq="15T", + tz="Asia/Kolkata", + inclusive="left", + ) + b = pd.date_range( + "2022-01-20 06:00", + "2023-01-01 06:00", + freq="H", + tz=None, + inclusive="left", + ) + intersect = tools.intersect.indices_flex( + a, b, ignore_freq=True, ignore_tz=True, ignore_start_of_day=True + ) + # print(a) + print(intersect) + + +# tz() +start_day() +# freq() +# all() diff --git a/portfolyo/tools/intersect.py b/portfolyo/tools/intersect.py index 5e2df66..15d0f17 100644 --- a/portfolyo/tools/intersect.py +++ b/portfolyo/tools/intersect.py @@ -3,7 +3,8 @@ import pandas as pd from portfolyo import tools -from portfolyo.tools.freq import longest +from portfolyo.tools.right import stamp +from portfolyo.tools.freq import longest, longer_or_shorter from datetime import datetime @@ -119,6 +120,11 @@ def indices_flex( distinct_sod = set([i[0].time() for i in idxs]) if len(distinct_sod) != 1 and ignore_start_of_day is False: raise ValueError(f"Indices must have equal start-of-day; got {distinct_sod}.") + for i in range(len(idxs)): + if len(distinct_sod) != 1 and longer_or_shorter(idxs[i].freq, "D") == -1: + raise ValueError( + "Downsample all indices to daily-or-longer, or trim them so they have the same start-of-day, before attempting to calculate the intersection" + ) freq, name, tz = [], [], [] for i in range(len(idxs)): @@ -126,16 +132,7 @@ def indices_flex( name.append(idxs[i].name) tz.append(idxs[i].tz) - # add one interval of the respective freq to each index (this way, a given date-range from A-B that was exclusive - # of B is now inclusive of B - this helps when we need to convert frequencies or times-of-day without loosing - # data. At the end, we exclude the end-date of the final result again.) - # idxs = [ - # idx.append( - # pd.DatetimeIndex([idx[-1] + pd.tseries.frequencies.to_offset(idx.freq)]) - # ) - # for idx in idxs - # ] - + longest_freq = freq[0] if ignore_freq is True and len(distinct_freqs) != 1: # Find the longest frequency longest_freq = longest(*freq) @@ -169,8 +166,9 @@ def indices_flex( idxs_out = [] for i in range(len(idxs)): start = min(values) + # end = stamp(start, longest_freq._prefix) end = max(values) - inclusive = "both" + end = stamp(end, longest_freq) if ignore_start_of_day is True: start = datetime.combine(pd.to_datetime(start).date(), start_of_day[i]) @@ -184,7 +182,7 @@ def indices_flex( freq=freq[i], name=name[i], tz=tz[i], - inclusive=inclusive, + inclusive="left", ) ) diff --git a/tests/tools/test_intersect_flex.py b/tests/tools/test_intersect_flex.py index f480d89..29e1281 100644 --- a/tests/tools/test_intersect_flex.py +++ b/tests/tools/test_intersect_flex.py @@ -95,7 +95,7 @@ def test_intersect_flex_ignore_start_of_day( do_test_intersect( "idx", idxs, - expected_startdate, + ValueError if freq == "15T" or freq == "H" else expected_startdate, expected_tz=tz, expected_freq=freq, expected_starttime=starttime, @@ -190,7 +190,11 @@ def test_ignore_all( # indexorframe: str, do_test_intersect( "idx", idxs, - expected_startdate, + ( + ValueError + if freq[0] == "15T" or freq[0] == "H" or freq[1] == "15T" or freq[1] == "H" + else expected_startdate + ), expected_tz=tz, expected_freq=freq[0], expected_starttime=starttime,