added slice property & tests

rwijtvliet · Feb 6, 2024 · aeca88b · aeca88b
1 parent 7453eec
commit aeca88b
Show file tree

Hide file tree

Showing 10 changed files with 1,475 additions and 1,225 deletions.
diff --git a/docs/core/pfline.rst b/docs/core/pfline.rst
@@ -252,6 +252,8 @@ Index slice
 
 From ``pandas`` we know the ``.loc[]`` property which allows us to select a slice of the objects. This is implemented also for portfolio lines. Currently, it supports enering a slice of timestamps. It is a wrapper around the ``pandas.DataFrame.loc[]`` property, and therefore follows the same convention, with the end point being included in the result.
 
+Another slicing method is implemented with the ``.slice[]`` property. The improvement to ``.loc[]`` is, that ``.slice[]`` uses the more common convention of excluding the end point. This has several advantages, which stem from the fact that, unlike when using ``.loc``, using ``left = pfl.slice[:a]`` and ``right = pfl.slice[a:]`` returns portfolio lines that are complements - every timestamp in the original portfolio line is found in either the left or the right slice. This is useful when e.g. concatenating portfolio lines (see below.)
+
 .. exec_code::
 
    # --- hide: start ---
@@ -261,12 +263,13 @@ From ``pandas`` we know the ``.loc[]`` property which allows us to select a slic
    pfl = pf.PfLine(input_df)
    # --- hide: stop ---
    # continuation of previous code example
-   pfl.loc['2024':'2025']  # includes 2025
+   pfl.slice['2024':'2026']  # excludes 2026; 2026 interpreted as timestamp 2026-01-01 00:00:00
    # --- hide: start ---
-   print(pfl.loc['2024':'2025'])
+   print(pfl.slice['2024':'2026'])
    # --- hide: stop ---
 
 
+
 Volume-only, price-only or revenue-only
 =======================================
 

diff --git a/docs/tutorial/part3.ipynb b/docs/tutorial/part3.ipynb
diff --git a/docs/tutorial/part4.ipynb b/docs/tutorial/part4.ipynb
diff --git a/portfolyo/core/ndframelike.py b/portfolyo/core/ndframelike.py
@@ -42,6 +42,12 @@ def loc(self):
         a boolean array.)"""
         ...
 
+    @abc.abstractproperty
+    def slice(self):
+        """Create a new instance with a subset of the rows.
+        Different from loc since performs slicing with right-open interval."""
+        ...
+
     @abc.abstractmethod
     def dataframe(
         self, cols: Iterable[str] = None, has_units: bool = True, *args, **kwargs

diff --git a/portfolyo/core/pfline/classes.py b/portfolyo/core/pfline/classes.py
@@ -286,6 +286,7 @@ class FlatPfLine(PfLine):
     hedge_with = prices.Flat.hedge_with
     # map_to_year => on child classes
     loc = flat_methods.loc
+    slice = flat_methods.slice
     __getitem__ = flat_methods.__getitem__
     # __bool__ => on child classes
     __eq__ = flat_methods.__eq__
@@ -300,6 +301,7 @@ class NestedPfLine(PfLine, children.ChildFunctionality):
     hedge_with = prices.Nested.hedge_with
     map_to_year = nested_methods.map_to_year
     loc = nested_methods.loc
+    slice = nested_methods.slice
     __bool__ = nested_methods.__bool__
     __eq__ = nested_methods.__eq__
 

diff --git a/portfolyo/core/pfline/flat_methods.py b/portfolyo/core/pfline/flat_methods.py
@@ -3,6 +3,8 @@
 from typing import TYPE_CHECKING, Any
 
 from ... import testing
+import pandas as pd
+from datetime import timedelta
 
 if TYPE_CHECKING:
     from .classes import FlatPfLine
@@ -31,6 +33,11 @@ def loc(self: FlatPfLine) -> LocIndexer:
     return LocIndexer(self)
 
 
+@property
+def slice(self: FlatPfLine) -> SliceIndexer:
+    return SliceIndexer(self)
+
+
 class LocIndexer:
     """Helper class to obtain FlatPfLine instance, whose index is subset of original index."""
 
@@ -40,3 +47,21 @@ def __init__(self, pfl: FlatPfLine):
     def __getitem__(self, arg) -> FlatPfLine:
         newdf = self.pfl.df.loc[arg]
         return self.pfl.__class__(newdf)  # use same (leaf) class
+
+
+class SliceIndexer:
+    """Helper class to obtain FlatPfLine instance, whose index is subset of original index.
+    Exclude end point from the slice."""
+
+    def __init__(self, pfl: FlatPfLine):
+        self.pfl = pfl
+
+    def __getitem__(self, arg) -> FlatPfLine:
+        date_start = pd.to_datetime(arg.start)
+        date_end = pd.to_datetime(arg.stop)
+
+        if arg.stop is not None:
+            date_end = date_end - timedelta(seconds=1)
+
+        newdf = self.pfl.df.loc[date_start:date_end]
+        return self.pfl.__class__(newdf)  # use same (leaf) class
diff --git a/portfolyo/core/pfline/nested_methods.py b/portfolyo/core/pfline/nested_methods.py
@@ -37,6 +37,11 @@ def loc(self: NestedPfLine) -> LocIndexer:
     return LocIndexer(self)
 
 
+@property
+def slice(self: NestedPfLine) -> SliceIndexer:
+    return SliceIndexer(self)
+
+
 class LocIndexer:
     """Helper class to obtain NestedPfLine instance, whose index is subset of original index."""
 
@@ -46,3 +51,15 @@ def __init__(self, pfl: NestedPfLine):
     def __getitem__(self, arg) -> NestedPfLine:
         newchildren = {name: child.loc[arg] for name, child in self.pfl.items()}
         return self.pfl.__class__(newchildren)
+
+
+class SliceIndexer:
+    """Helper class to obtain NestedPfLine instance, whose index is subset of original index.
+    Exclude end point from the slice."""
+
+    def __init__(self, pfl: NestedPfLine):
+        self.pfl = pfl
+
+    def __getitem__(self, arg) -> NestedPfLine:
+        newchildren = {name: child.slice[arg] for name, child in self.pfl.items()}
+        return self.pfl.__class__(newchildren)
diff --git a/portfolyo/core/pfstate/pfstate.py b/portfolyo/core/pfstate/pfstate.py
@@ -280,6 +280,10 @@ def __bool__(self):
     def loc(self) -> _LocIndexer:  # from ABC
         return _LocIndexer(self)
 
+    @property
+    def slice(self) -> _SliceIndexer:  # from ABC
+        return _SliceIndexer(self)
+
 
 class _LocIndexer:
     """Helper class to obtain PfState instance, whose index is subset of original index."""
@@ -292,3 +296,17 @@ def __getitem__(self, arg) -> PfState:
         unsourcedprice = self.pfs.unsourcedprice.loc[arg]
         sourced = self.pfs.sourced.loc[arg]
         return PfState(offtakevolume, unsourcedprice, sourced)
+
+
+class _SliceIndexer:
+    """Helper class to obtain PfState instance, whose index is subset of original index.
+    Exclude end index from the slice"""
+
+    def __init__(self, pfs):
+        self.pfs = pfs
+
+    def __getitem__(self, arg) -> PfState:
+        offtakevolume = self.pfs.offtake.volume.slice[arg]
+        unsourcedprice = self.pfs.unsourcedprice.slice[arg]
+        sourced = self.pfs.sourced.slice[arg]
+        return PfState(offtakevolume, unsourcedprice, sourced)
diff --git a/tests/core/pfline/test_slice.py b/tests/core/pfline/test_slice.py
@@ -0,0 +1,91 @@
+"""Test if slice attributes works properly with portfolio line."""
+
+import pytest
+import pandas as pd
+from portfolyo import dev
+
+
+@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
+@pytest.mark.parametrize("slice_start", ["2021", "2022", "2022-01-02"])
+def test_flat_slice_start(slice_start, freq):
+    index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
+    pfl1 = dev.get_flatpfline(index)
+    assert pfl1.slice[slice_start:] == pfl1.loc[slice_start:]
+
+
+@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
+@pytest.mark.parametrize(
+    "slice_end",
+    [
+        # (<param for slice>, <param for loc>)
+        ("2021", "2020"),
+        ("2022", "2021"),
+        ("2021-07", "2021-06"),
+        ("2022-01-02", "2022-01-01"),
+    ],
+)
+def test_flat_slice_end(slice_end, freq):
+    index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
+    pfl1 = dev.get_flatpfline(index)
+    assert pfl1.slice[: slice_end[0]] == pfl1.loc[: slice_end[1]]
+
+
+@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
+@pytest.mark.parametrize(
+    "where",
+    ["2022", "2022-03", "2022-04-21", "2022-05-23 14:34"],
+)
+def test_flat_slice_whole(where: str, freq: str):
+    """Test that slicing splits the pfl in 2 non-overlapping pieces without gap
+    (i.e., ensure that each original timestamp is in exactly one of the resulting pieces.)
+    """
+    index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
+    pfl1 = dev.get_flatpfline(index)
+    left, right = pfl1.slice[:where], pfl1.slice[where:]
+    # Test that each timestamp is present at least once.
+    pd.testing.assert_index_equal(left.index.union(right.index), index)
+    # Test that no timestamp is present twice.
+    assert len(left.index.intersection(right.index)) == 0
+
+
+@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
+@pytest.mark.parametrize("slice_start", ["2021", "2022", "2022-01-02"])
+def test_nested_slice_start(slice_start, freq):
+    index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
+    pfl1 = dev.get_nestedpfline(index)
+    assert pfl1.slice[slice_start:] == pfl1.loc[slice_start:]
+
+
+@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
+@pytest.mark.parametrize(
+    "slice_end",
+    [
+        # (<param for slice>, <param for loc>)
+        ("2021", "2020"),
+        ("2022", "2021"),
+        ("2021-07", "2021-06"),
+        ("2022-01-02", "2022-01-01"),
+    ],
+)
+def test_nested_slice_end(slice_end, freq):
+    index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
+    pfl1 = dev.get_nestedpfline(index)
+    assert pfl1.slice[: slice_end[0]] == pfl1.loc[: slice_end[1]]
+
+
+@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
+@pytest.mark.parametrize(
+    "where",
+    ["2022", "2022-03", "2022-04-21", "2022-05-23 14:34"],
+)
+def test_nested_slice_whole(where: str, freq: str):
+    """Test that slicing splits the pfl in 2 non-overlapping pieces without gap
+    (i.e., ensure that each original timestamp is in exactly one of the resulting pieces.)
+    """
+    index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
+    pfl1 = dev.get_nestedpfline(index)
+    left, right = pfl1.slice[:where], pfl1.slice[where:]
+    # Test that each timestamp is present at least once.
+    pd.testing.assert_index_equal(left.index.union(right.index), index)
+    # Test that no timestamp is present twice.
+    assert len(left.index.intersection(right.index)) == 0
diff --git a/tests/core/pfstate/test_slice_state.py b/tests/core/pfstate/test_slice_state.py
@@ -0,0 +1,68 @@
+"""Test if slice attributes works properly with portfolio state."""
+
+import pytest
+import pandas as pd
+from portfolyo import dev
+
+
+@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
+@pytest.mark.parametrize("slice_start", ["2021", "2022", "2022-01-02"])
+@pytest.mark.parametrize(
+    "slice_end",
+    [
+        # (<param for slice>, <param for loc>)
+        ("2021", "2020"),
+        ("2022", "2021"),
+        ("2022-01-02", "2022-01-01"),
+    ],
+)
+def test_slice_state(slice_start, slice_end, freq):
+    index = pd.date_range("2020", "2024", freq=freq)
+    pfs = dev.get_pfstate(index)
+
+    pfs_to_concat = [pfs.slice[: slice_end[0]], pfs.slice[slice_start:]]
+    pfs_to_concat2 = [pfs.loc[: slice_end[1]], pfs.loc[slice_start:]]
+    assert pfs_to_concat == pfs_to_concat2
+
+
+@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
+@pytest.mark.parametrize("slice_start", ["2021", "2022", "2022-01-02"])
+def test_state_slice_start(slice_start, freq):
+    index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
+    pfs = dev.get_pfstate(index)
+    assert pfs.slice[slice_start:] == pfs.loc[slice_start:]
+
+
+@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
+@pytest.mark.parametrize(
+    "slice_end",
+    [
+        # (<param for slice>, <param for loc>)
+        ("2021", "2020"),
+        ("2022", "2021"),
+        ("2021-07", "2021-06"),
+        ("2022-01-02", "2022-01-01"),
+    ],
+)
+def test_state_slice_end(slice_end, freq):
+    index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
+    pfs = dev.get_pfstate(index)
+    assert pfs.slice[: slice_end[0]] == pfs.loc[: slice_end[1]]
+
+
+@pytest.mark.parametrize("freq", ["MS", "AS", "QS", "D", "15T"])
+@pytest.mark.parametrize(
+    "where",
+    ["2022", "2022-03", "2022-04-21", "2022-05-23 14:34"],
+)
+def test_state_slice_whole(where: str, freq: str):
+    """Test that slicing splits the pfl in 2 non-overlapping pieces without gap
+    (i.e., ensure that each original timestamp is in exactly one of the resulting pieces.)
+    """
+    index = pd.date_range("2020", "2024", freq=freq, inclusive="left")
+    pfs = dev.get_pfstate(index)
+    left, right = pfs.slice[:where], pfs.slice[where:]
+    # Test that each timestamp is present at least once.
+    pd.testing.assert_index_equal(left.index.union(right.index), index)
+    # Test that no timestamp is present twice.
+    assert len(left.index.intersection(right.index)) == 0