Skip to content

Commit

Permalink
Merge branch 'develop' into intersect
Browse files Browse the repository at this point in the history
  • Loading branch information
Alina Voilova committed Apr 9, 2024
2 parents f9fe32b + 47901d6 commit 7577362
Show file tree
Hide file tree
Showing 21 changed files with 3,729 additions and 1,150 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-on-pullreq.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
fail-fast: true
matrix:
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11", "3.12"]

steps:
- name: Checkout source
Expand Down
12 changes: 6 additions & 6 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ build:
tools:
python: "3.10"
jobs:
post_create_environment:
# Install poetry
# https://python-poetry.org/docs/#installing-manually

post_install:
- pip install poetry
# Tell poetry to not use a virtual environment
- poetry config virtualenvs.create false
post_install:
# Install dependencies with 'docs' dependency group
# https://python-poetry.org/docs/managing-dependencies/#dependency-groups
- poetry install --with docs
- poetry export -f requirements.txt --without-hashes --without-urls --with docs -o requirements.txt
- pip install -r requirements.txt
#- pip list


sphinx:
configuration: docs/conf.py
Expand Down
1,679 changes: 837 additions & 842 deletions poetry.lock

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions portfolyo/core/pfline/flat_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from typing import TYPE_CHECKING, Any

from portfolyo import tools

from ... import testing
import pandas as pd

Expand Down Expand Up @@ -45,6 +47,13 @@ def __init__(self, pfl: FlatPfLine):

def __getitem__(self, arg) -> FlatPfLine:
newdf = self.pfl.df.loc[arg]
try:
tools.standardize.assert_frame_standardized(newdf)
except AssertionError as e:
raise ValueError(
"Timeseries not in expected form. See ``portfolyo.standardize()`` for more information."
) from e

return self.pfl.__class__(newdf) # use same (leaf) class


Expand All @@ -63,4 +72,10 @@ def __getitem__(self, arg) -> FlatPfLine:
mask &= self.pfl.index < arg.stop

newdf = self.pfl.df.loc[mask]
try:
tools.standardize.assert_frame_standardized(newdf)
except AssertionError as e:
raise ValueError(
"Timeseries not in expected form. See ``portfolyo.standardize()`` for more information."
) from e
return self.pfl.__class__(newdf) # use same (leaf) class
36 changes: 24 additions & 12 deletions portfolyo/dev/develop.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

import datetime as dt
from typing import Dict, Union, Callable, Tuple
from typing import Callable, Dict, Tuple, Union

import numpy as np
import pandas as pd
Expand All @@ -28,30 +28,42 @@ def get_index(
_seed: int = None,
) -> pd.DatetimeIndex:
"""Get index."""
# Prepare values.
if _seed:
np.random.seed(_seed)
if not periods:
standard_len = INDEX_LEN.get(freq, 10)
periods = np.random.randint(standard_len // 2, standard_len * 2)
if tools.freq.up_or_down(freq, "H") <= 0 and tz is None:
# Shorten index to not include timestamp that do not exist in Europe/Berlin.
periods = min(periods, 4000)
if not startdate:
a, m, d = 2020, 1, 1
a += np.random.randint(-4, 4) if _seed else (periods % 20 - 10)
a, m, d = 2016, 1, 1 # earliest possible
a += np.random.randint(0, 8) if _seed else (periods % 8)
if tools.freq.up_or_down(freq, "MS") <= 0:
m += np.random.randint(0, 12) if _seed else (periods % 12)
if tools.freq.up_or_down(freq, "D") <= 0:
d += np.random.randint(0, 28) if _seed else (periods % 28)
if tools.freq.up_or_down(freq, "H") <= 0 and tz is None:
# Start index after DST-start to not include timestamps that do not exist in Europe/Berlin.
m, d = 4, 2
startdate = f"{a}-{m}-{d}"
if not start_of_day:
start_of_day = dt.time(hour=0, minute=0)
starttime = f"{start_of_day.hour:02}:{start_of_day.minute:02}:00"
start = f"{startdate} {starttime}"
return pd.date_range(start, freq=freq, periods=periods, tz=tz)
# Create index.
start = tools.stamp.create(startdate, tz, start_of_day)
i = pd.date_range(start, periods=periods, freq=freq) # tz included in start
# Some checks.
if tools.freq.up_or_down(freq, "H") <= 0:
i = _shorten_index_if_necessary(i, start_of_day)
return i


def _shorten_index_if_necessary(i, start_of_day) -> pd.DatetimeIndex:
"""Shorten index with (quarter)hourly values if necessary to ensure that an integer
number of calendar days is included."""
if (i[-1] - i[0]).total_seconds() < 23 * 3600:
raise ValueError("Index must contain at least one full day")
# Must ensure that index is integer number of days.
for _ in range(0, 100): # max 100 quarterhours in a day (@ end of DST)
if tools.right.stamp(i[-1], i.freq).time() == start_of_day:
return i
i = i[:-1]
raise ValueError("Can't find timestamp to end index on.")


def get_value(
Expand Down
7 changes: 3 additions & 4 deletions portfolyo/tools/freq.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def set_to_index(
Parameters
----------
i : pd.DatetimeIndex
wanted : str, optional
wanted : str, optional (default: None)
Frequency to set. If none provided, try to infer.
strict : bool, optional (default: False)
If True, raise ValueError if a valid frequency is not found.
Expand Down Expand Up @@ -293,6 +293,5 @@ def set_to_frame(
"The data does not have a datetime index and can therefore not have a frequency."
)

fr = fr.copy()
fr.index = set_to_index(fr.index, wanted, strict)
return fr
i = set_to_index(fr.index, wanted, strict)
return fr.set_axis(i, axis=0)
139 changes: 74 additions & 65 deletions portfolyo/tools/standardize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,44 +8,48 @@
from pytz import AmbiguousTimeError, NonExistentTimeError

from . import freq as tools_freq
from . import right as tools_right
from . import righttoleft as tools_righttoleft
from . import tzone as tools_tzone


# TODO: remove 'Europe/Berlin' as default for ``tz``, use None instead.
def frame(
fr: Union[pd.Series, pd.DataFrame],
force: str = None,
bound: str = "left",
*,
tz: str = "Europe/Berlin",
tz: str = None,
floating: bool = True,
index_col: str = None,
force_freq: str = None,
) -> Union[pd.Series, pd.DataFrame]:
"""Standardize a series or dataframe.
Parameters
----------
fr : pd.Series or pd.DataFrame
force : {'aware', 'agnostic'}, optional (default: None)
Force ``fr`` to be timezone aware or timezone agnostic. If None: keep index
as-is.
Force returned frame to be timezone aware or timezone agnostic. Timezone aware
means that the index has a timezone, like UTC or Europe/Berlin. Timezone agnostic
means that the index shows wall time with 24h days without DST. If None: keep
timezone as-is.
bound : {'left', 'right'}, optional (default: 'left')
If 'left' ('right'), specifies that input timestamps are left-(right-)bound.
If 'right', specifies that input timestamps are right-bound, and will change to
left-bound.
tz : str, optional (default: None)
The timezone in which to interpret non-localized values. If ``force`` ==
'aware': also the timezone to localize to. Ignored if ``force`` is None.
Timezone. Timezone to convert the values into and/or to interpret values in.
- If ``force`` is None, this argument is ignored.
- If the index is timezone agnostic and ``force`` is 'aware', the frame values
are interpreted as being in this timezone. E.g., if 'Europe/Berlin', the final
Sunday in March already has 23h; all that's needed is to set the timezone.
- If the index is timezone agnostic and ``force`` is 'agnostic', the same is
done, but followed with a conversion to timezone-agnostic frame.
- If the index is timezone aware and ``force`` is 'aware', the values are
converted to timezone ``tz``. See parameter ``floating``.
- If the index is timezone aware and ``force`` is 'agnostic', this argument is
ignored.
floating : bool, optional (default: True)
If ``force`` == 'aware': how to convert to ``tz`` if ``fr`` has other timezone.
Keep local time (``floating`` == True) or keep universal time (``floating`` ==
False). Ignored if ``force`` == 'agnostic' or None.
index_col : str, optional
Column to create the timestamp from. Use existing index if none specified.
Ignored if ``fr`` is not a DataFrame.
force_freq : str, optional
If a frequency cannot be inferred from the data (e.g. due to gaps), it is
resampled at this frequency. Default: raise Exception.
Returns
-------
Expand All @@ -57,21 +61,16 @@ def frame(
-----
It is assumed that we are dealing with "time-averable" data, such as values in [MW]
or [Eur/MWh]. This is especially important when converting daily (and longer) values
between a tz-agnostic context and a tz-aware context with DST-transitions.
between a tz-agnostic context and a tz-aware context with DST-transitions. The value
on a 23-hour day is used as-is when convertintg to a 24-hour day.
See also
--------
``portfolyo.force_aware``
``portfolyo.force_agnostic``
"""
kwargs = {"tz": tz, "floating": floating, "force_freq": force_freq}

# Set index.
if index_col and isinstance(fr, pd.DataFrame):
fr = fr.set_index(index_col)
else:
fr = fr.copy() # don't change passed-in fr
fr.index = pd.DatetimeIndex(fr.index) # turn / try to turn into datetime
fr = fr.set_axis(pd.DatetimeIndex(fr.index)) # turn / try to turn into datetime

# We want to cover 2 additional cases for convenience sake:
# a. The user passes a frame that still needs to be localized (--> freq unknown)
Expand All @@ -85,14 +84,7 @@ def frame(
# The data may be right-bound.

if bound == "right": # right -> left
for how in ["A", "B"]:
try:
fr_left = fr.set_axis(tools_righttoleft.index(fr.index, how))
return frame(fr_left, force, "left", **kwargs)
except ValueError as e:
if how == "B":
raise ValueError("Cannot make this frame left-bound.") from e
pass
return _fix_rightbound(fr, force, tz, floating)

# Now the data is left-bound.
# If the frequency is not found, and it is tz-naive, the index may need to be localized.
Expand All @@ -106,46 +98,23 @@ def frame(
else:
# Could be localized. Again remove localization if force == 'agnostic' or None.
force_to = force or "agnostic"
return frame(fr_aware, force_to, "left", **kwargs)
return frame(fr_aware, force_to, "left", tz=tz, floating=floating)

# All options to infer frequency have been exhausted. One may or may not have been found.
# Does the user want to force a frequency?

if not freq_input and force_freq:
# No freq has been found, but user specifies which freq it should be.
fr_withfreq = fr.asfreq(force_freq)
return frame(fr_withfreq, force, "left", tz=tz, floating=floating)

elif not freq_input and not force_freq:
if not freq_input:
# No freq has been bound, and user specifies no freq either.
raise ValueError(
"A frequency could not be inferred for this data. Force a frequency (by passing the"
" ``force_freq`` parameter), or localize the data in advance (with ``fr.tz_localize()``)."
)

elif freq_input and force_freq and force_freq != freq_input:
# Freq has been found, but user specifies it should be a different freq.
raise ValueError(
f"This data seems to have a frequency {freq_input}, which is different from the frequency"
f" the user wants to force on it {force_freq}. Note that the ``force_freq`` parameter is"
" for filling gaps in the input data. It should not be used for resampling! If the"
" data has e.g. daily values but you want monthly values, use ``force_freq='D'``, and"
" pass the return value to one of the functions in the ``portfolyo.tools.changefreq`` module."
"A frequency could not be inferred for this data. This can be because there are"
" gaps in the data (use ``fr.asfreq()`` to replace gaps with NaN), or because"
" the index must be localized (with ``fr.tz_localize()``)."
)

# Now the data has frequency set. It is tz-aware (possibly with wrong tz) or tz-agnostic.
# Now the data has frequency set. It is tz-aware (possibly with wrong tz) or tz-agnostic.

# Fix timezone.
if force == "aware":
fr = tools_tzone.force_aware(fr, tz, floating=floating)
elif force == "agnostic" or force == "naive":
fr = tools_tzone.force_agnostic(fr)
elif force is None: # don't try to fix timezone.
pass
else:
raise ValueError(
f"Parameter ``force`` must be one of 'aware', 'agnostic'; got {force}."
)
fr = _fix_timezone(fr, force, tz, floating)

# Check if index is OK; otherwise raise error.
try:
Expand All @@ -154,9 +123,36 @@ def frame(
raise ValueError("Could not standardize this frame") from e

# Standardize index name.
fr.index.name = "ts_left"
fr = _standardize_index_name(fr)
# After standardizing timezone, the frequency should have been set.
return tools_freq.set_to_frame(fr, freq_input, strict=force_freq)
return tools_freq.set_to_frame(fr, freq_input, strict=True)


def _fix_rightbound(fr, force, tz, floating):
for how in ["A", "B"]:
try:
i_left = tools_righttoleft.index(fr.index, how)
fr_left = fr.set_axis(i_left)
return frame(fr_left, force, "left", tz=tz, floating=floating)
except ValueError:
pass
raise ValueError("Cannot make this frame left-bound.")


def _fix_timezone(fr, force, tz, floating):
if force is None:
return fr
elif force == "aware":
return tools_tzone.force_aware(fr, tz, floating=floating)
elif force == "agnostic" or force == "naive":
return tools_tzone.force_agnostic(fr)
raise ValueError(
f"Parameter ``force`` must be None, 'aware' or 'agnostic'; got {force}."
)


def _standardize_index_name(fr: Union[pd.Series, pd.DataFrame]):
return fr.rename_axis(index="ts_left")


def assert_frame_standardized(fr: Union[pd.Series, pd.DataFrame]):
Expand Down Expand Up @@ -189,7 +185,7 @@ def assert_index_standardized(i: pd.DatetimeIndex, __right: bool = False):
if i[0].minute != startminute:
err = ("right-bound", "15 min past the") if __right else ("", "at a full")
raise AssertionError(
f"An index with {err[0]} quarterhourly values must start {err[1]} hour; found {i[0]}."
f"The first element in an index with {err[0]} quarterhourly values must be {err[1]} hour; found {i[0]}."
)

if any(not_ok := [ts.minute not in (0, 15, 30, 45) for ts in i]):
Expand All @@ -205,7 +201,20 @@ def assert_index_standardized(i: pd.DatetimeIndex, __right: bool = False):
)

# Check time-of-day.
if tools_freq.up_or_down(freq, "D") >= 0:
if tools_freq.up_or_down(freq, "H") <= 0: # hour or shorter
if not __right:
start = i[0]
end = tools_right.stamp(i[-1], i.freq)
else:
start = tools_righttoleft.index(i)[0]
end = i[-1]
if start.time() != end.time():
raise AssertionError(
"An index must contain full days. For hourly-or-shorter values, this means "
f"that the start time of the first period ({start}) must equal the end time of the "
f"last period ({end}), which is not the case."
)
else: # days or longer
if not len(times := set(i.time)) == 1:
raise AssertionError(
"In an index with daily-or-longer values, all timestamps (all periods) should"
Expand Down
Loading

0 comments on commit 7577362

Please sign in to comment.