Skip to content

Commit

Permalink
added slice property & tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Alina Voilova committed Feb 6, 2024
1 parent 7453eec commit aeca88b
Show file tree
Hide file tree
Showing 10 changed files with 1,475 additions and 1,225 deletions.
7 changes: 5 additions & 2 deletions docs/core/pfline.rst
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ Index slice

From ``pandas`` we know the ``.loc[]`` property which allows us to select a slice of the objects. This is implemented also for portfolio lines. Currently, it supports enering a slice of timestamps. It is a wrapper around the ``pandas.DataFrame.loc[]`` property, and therefore follows the same convention, with the end point being included in the result.

Another slicing method is implemented with the ``.slice[]`` property. The improvement to ``.loc[]`` is, that ``.slice[]`` uses the more common convention of excluding the end point. This has several advantages, which stem from the fact that, unlike when using ``.loc``, using ``left = pfl.slice[:a]`` and ``right = pfl.slice[a:]`` returns portfolio lines that are complements - every timestamp in the original portfolio line is found in either the left or the right slice. This is useful when e.g. concatenating portfolio lines (see below.)

.. exec_code::

# --- hide: start ---
Expand All @@ -261,12 +263,13 @@ From ``pandas`` we know the ``.loc[]`` property which allows us to select a slic
pfl = pf.PfLine(input_df)
# --- hide: stop ---
# continuation of previous code example
pfl.loc['2024':'2025'] # includes 2025
pfl.slice['2024':'2026'] # excludes 2026; 2026 interpreted as timestamp 2026-01-01 00:00:00
# --- hide: start ---
print(pfl.loc['2024':'2025'])
print(pfl.slice['2024':'2026'])
# --- hide: stop ---



Volume-only, price-only or revenue-only
=======================================

Expand Down
1,910 changes: 965 additions & 945 deletions docs/tutorial/part3.ipynb

Large diffs are not rendered by default.

556 changes: 278 additions & 278 deletions docs/tutorial/part4.ipynb

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions portfolyo/core/ndframelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ def loc(self):
a boolean array.)"""
...

@abc.abstractproperty
def slice(self):
"""Create a new instance with a subset of the rows.
Different from loc since performs slicing with right-open interval."""
...

@abc.abstractmethod
def dataframe(
self, cols: Iterable[str] = None, has_units: bool = True, *args, **kwargs
Expand Down
2 changes: 2 additions & 0 deletions portfolyo/core/pfline/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ class FlatPfLine(PfLine):
hedge_with = prices.Flat.hedge_with
# map_to_year => on child classes
loc = flat_methods.loc
slice = flat_methods.slice
__getitem__ = flat_methods.__getitem__
# __bool__ => on child classes
__eq__ = flat_methods.__eq__
Expand All @@ -300,6 +301,7 @@ class NestedPfLine(PfLine, children.ChildFunctionality):
hedge_with = prices.Nested.hedge_with
map_to_year = nested_methods.map_to_year
loc = nested_methods.loc
slice = nested_methods.slice
__bool__ = nested_methods.__bool__
__eq__ = nested_methods.__eq__

Expand Down
25 changes: 25 additions & 0 deletions portfolyo/core/pfline/flat_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from typing import TYPE_CHECKING, Any

from ... import testing
import pandas as pd
from datetime import timedelta

if TYPE_CHECKING:
from .classes import FlatPfLine
Expand Down Expand Up @@ -31,6 +33,11 @@ def loc(self: FlatPfLine) -> LocIndexer:
return LocIndexer(self)


@property
def slice(self: FlatPfLine) -> SliceIndexer:
return SliceIndexer(self)


class LocIndexer:
"""Helper class to obtain FlatPfLine instance, whose index is subset of original index."""

Expand All @@ -40,3 +47,21 @@ def __init__(self, pfl: FlatPfLine):
def __getitem__(self, arg) -> FlatPfLine:
newdf = self.pfl.df.loc[arg]
return self.pfl.__class__(newdf) # use same (leaf) class


class SliceIndexer:
"""Helper class to obtain FlatPfLine instance, whose index is subset of original index.
Exclude end point from the slice."""

def __init__(self, pfl: FlatPfLine):
self.pfl = pfl

def __getitem__(self, arg) -> FlatPfLine:
date_start = pd.to_datetime(arg.start)
date_end = pd.to_datetime(arg.stop)

if arg.stop is not None:
date_end = date_end - timedelta(seconds=1)

newdf = self.pfl.df.loc[date_start:date_end]
return self.pfl.__class__(newdf) # use same (leaf) class
17 changes: 17 additions & 0 deletions portfolyo/core/pfline/nested_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ def loc(self: NestedPfLine) -> LocIndexer:
return LocIndexer(self)


@property
def slice(self: NestedPfLine) -> SliceIndexer:
return SliceIndexer(self)


class LocIndexer:
"""Helper class to obtain NestedPfLine instance, whose index is subset of original index."""

Expand All @@ -46,3 +51,15 @@ def __init__(self, pfl: NestedPfLine):
def __getitem__(self, arg) -> NestedPfLine:
newchildren = {name: child.loc[arg] for name, child in self.pfl.items()}
return self.pfl.__class__(newchildren)


class SliceIndexer:
"""Helper class to obtain NestedPfLine instance, whose index is subset of original index.
Exclude end point from the slice."""

def __init__(self, pfl: NestedPfLine):
self.pfl = pfl

def __getitem__(self, arg) -> NestedPfLine:
newchildren = {name: child.slice[arg] for name, child in self.pfl.items()}
return self.pfl.__class__(newchildren)
18 changes: 18 additions & 0 deletions portfolyo/core/pfstate/pfstate.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,10 @@ def __bool__(self):
def loc(self) -> _LocIndexer: # from ABC
return _LocIndexer(self)

@property
def slice(self) -> _SliceIndexer: # from ABC
return _SliceIndexer(self)


class _LocIndexer:
"""Helper class to obtain PfState instance, whose index is subset of original index."""
Expand All @@ -292,3 +296,17 @@ def __getitem__(self, arg) -> PfState:
unsourcedprice = self.pfs.unsourcedprice.loc[arg]
sourced = self.pfs.sourced.loc[arg]
return PfState(offtakevolume, unsourcedprice, sourced)


class _SliceIndexer:
"""Helper class to obtain PfState instance, whose index is subset of original index.
Exclude end index from the slice"""

def __init__(self, pfs):
self.pfs = pfs

def __getitem__(self, arg) -> PfState:
offtakevolume = self.pfs.offtake.volume.slice[arg]
unsourcedprice = self.pfs.unsourcedprice.slice[arg]
sourced = self.pfs.sourced.slice[arg]
return PfState(offtakevolume, unsourcedprice, sourced)
91 changes: 91 additions & 0 deletions tests/core/pfline/test_slice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""Test if slice attributes works properly with portfolio line."""

import pytest
import pandas as pd
from portfolyo import dev


@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
@pytest.mark.parametrize("slice_start", ["2021", "2022", "2022-01-02"])
def test_flat_slice_start(slice_start, freq):
index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
pfl1 = dev.get_flatpfline(index)
assert pfl1.slice[slice_start:] == pfl1.loc[slice_start:]


@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
@pytest.mark.parametrize(
"slice_end",
[
# (<param for slice>, <param for loc>)
("2021", "2020"),
("2022", "2021"),
("2021-07", "2021-06"),
("2022-01-02", "2022-01-01"),
],
)
def test_flat_slice_end(slice_end, freq):
index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
pfl1 = dev.get_flatpfline(index)
assert pfl1.slice[: slice_end[0]] == pfl1.loc[: slice_end[1]]


@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
@pytest.mark.parametrize(
"where",
["2022", "2022-03", "2022-04-21", "2022-05-23 14:34"],
)
def test_flat_slice_whole(where: str, freq: str):
"""Test that slicing splits the pfl in 2 non-overlapping pieces without gap
(i.e., ensure that each original timestamp is in exactly one of the resulting pieces.)
"""
index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
pfl1 = dev.get_flatpfline(index)
left, right = pfl1.slice[:where], pfl1.slice[where:]
# Test that each timestamp is present at least once.
pd.testing.assert_index_equal(left.index.union(right.index), index)
# Test that no timestamp is present twice.
assert len(left.index.intersection(right.index)) == 0


@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
@pytest.mark.parametrize("slice_start", ["2021", "2022", "2022-01-02"])
def test_nested_slice_start(slice_start, freq):
index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
pfl1 = dev.get_nestedpfline(index)
assert pfl1.slice[slice_start:] == pfl1.loc[slice_start:]


@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
@pytest.mark.parametrize(
"slice_end",
[
# (<param for slice>, <param for loc>)
("2021", "2020"),
("2022", "2021"),
("2021-07", "2021-06"),
("2022-01-02", "2022-01-01"),
],
)
def test_nested_slice_end(slice_end, freq):
index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
pfl1 = dev.get_nestedpfline(index)
assert pfl1.slice[: slice_end[0]] == pfl1.loc[: slice_end[1]]


@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
@pytest.mark.parametrize(
"where",
["2022", "2022-03", "2022-04-21", "2022-05-23 14:34"],
)
def test_nested_slice_whole(where: str, freq: str):
"""Test that slicing splits the pfl in 2 non-overlapping pieces without gap
(i.e., ensure that each original timestamp is in exactly one of the resulting pieces.)
"""
index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
pfl1 = dev.get_nestedpfline(index)
left, right = pfl1.slice[:where], pfl1.slice[where:]
# Test that each timestamp is present at least once.
pd.testing.assert_index_equal(left.index.union(right.index), index)
# Test that no timestamp is present twice.
assert len(left.index.intersection(right.index)) == 0
68 changes: 68 additions & 0 deletions tests/core/pfstate/test_slice_state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""Test if slice attributes works properly with portfolio state."""

import pytest
import pandas as pd
from portfolyo import dev


@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
@pytest.mark.parametrize("slice_start", ["2021", "2022", "2022-01-02"])
@pytest.mark.parametrize(
"slice_end",
[
# (<param for slice>, <param for loc>)
("2021", "2020"),
("2022", "2021"),
("2022-01-02", "2022-01-01"),
],
)
def test_slice_state(slice_start, slice_end, freq):
index = pd.date_range("2020", "2024", freq=freq)
pfs = dev.get_pfstate(index)

pfs_to_concat = [pfs.slice[: slice_end[0]], pfs.slice[slice_start:]]
pfs_to_concat2 = [pfs.loc[: slice_end[1]], pfs.loc[slice_start:]]
assert pfs_to_concat == pfs_to_concat2


@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
@pytest.mark.parametrize("slice_start", ["2021", "2022", "2022-01-02"])
def test_state_slice_start(slice_start, freq):
index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
pfs = dev.get_pfstate(index)
assert pfs.slice[slice_start:] == pfs.loc[slice_start:]


@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
@pytest.mark.parametrize(
"slice_end",
[
# (<param for slice>, <param for loc>)
("2021", "2020"),
("2022", "2021"),
("2021-07", "2021-06"),
("2022-01-02", "2022-01-01"),
],
)
def test_state_slice_end(slice_end, freq):
index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
pfs = dev.get_pfstate(index)
assert pfs.slice[: slice_end[0]] == pfs.loc[: slice_end[1]]


@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
@pytest.mark.parametrize(
"where",
["2022", "2022-03", "2022-04-21", "2022-05-23 14:34"],
)
def test_state_slice_whole(where: str, freq: str):
"""Test that slicing splits the pfl in 2 non-overlapping pieces without gap
(i.e., ensure that each original timestamp is in exactly one of the resulting pieces.)
"""
index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
pfs = dev.get_pfstate(index)
left, right = pfs.slice[:where], pfs.slice[where:]
# Test that each timestamp is present at least once.
pd.testing.assert_index_equal(left.index.union(right.index), index)
# Test that no timestamp is present twice.
assert len(left.index.intersection(right.index)) == 0

0 comments on commit aeca88b

Please sign in to comment.