merged with develop

rwijtvliet · Apr 16, 2024 · 7888d98 · 7888d98
2 parents 47e921a + c619cea
commit 7888d98
Show file tree

Hide file tree

Showing 38 changed files with 4,339 additions and 1,161 deletions.
diff --git a/.github/workflows/ci-on-pullreq.yaml b/.github/workflows/ci-on-pullreq.yaml
@@ -9,7 +9,7 @@ jobs:
       fail-fast: true
       matrix:
         os: ["ubuntu-latest", "macos-latest", "windows-latest"]
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12"]
 
     steps:
       - name: Checkout source

diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -5,16 +5,16 @@ build:
   tools:
     python: "3.10"
   jobs:
-    post_create_environment:
-      # Install poetry
-      # https://python-poetry.org/docs/#installing-manually
+
+    post_install:
       - pip install poetry
       # Tell poetry to not use a virtual environment
-      - poetry config virtualenvs.create false
-    post_install:
       # Install dependencies with 'docs' dependency group
       # https://python-poetry.org/docs/managing-dependencies/#dependency-groups
-      - poetry install --with docs
+      - poetry export -f requirements.txt --without-hashes --without-urls --with docs -o requirements.txt
+      - pip install -r requirements.txt
+      #- pip list
+
 
 sphinx:
   configuration: docs/conf.py

diff --git a/dev_scripts/checks.py b/dev_scripts/checks.py
@@ -0,0 +1,42 @@
+import pandas as pd
+import portfolyo as pf
+from portfolyo.core.shared import concat
+
+
+def get_idx(
+    startdate: str, starttime: str, tz: str, freq: str, enddate: str
+) -> pd.DatetimeIndex:
+    # Empty index.
+    if startdate is None:
+        return pd.DatetimeIndex([], freq=freq, tz=tz)
+    # Normal index.
+    ts_start = pd.Timestamp(f"{startdate} {starttime}", tz=tz)
+    ts_end = pd.Timestamp(f"{enddate} {starttime}", tz=tz)
+    return pd.date_range(ts_start, ts_end, freq=freq, inclusive="left")
+
+
+index = pd.date_range("2020", "2024", freq="QS", inclusive="left")
+# index2 = pd.date_range("2023", "2025", freq="QS", inclusive="left")
+# pfl = pf.dev.get_flatpfline(index)
+# pfl2 = pf.dev.get_flatpfline(index2)
+# print(pfl)
+# print(pfl2)
+
+# pfs = pf.dev.get_pfstate(index)
+
+# pfs2 = pf.dev.get_pfstate(index2)
+# pfl3 = concat.general(pfl, pfl2)
+# print(pfl3)
+
+# print(index)
+# print(index2)
+
+whole_pfl = pf.dev.get_nestedpfline(index)
+pfl_a = whole_pfl.slice[:"2021"]
+
+pfl_b = whole_pfl.slice["2021":"2022"]
+pfl_c = whole_pfl.slice["2022":]
+result = concat.concat_pflines(pfl_a, pfl_b, pfl_c)
+result2 = concat.concat_pflines(pfl_b, pfl_c, pfl_a)
+print(result)
+print(result2)
diff --git a/docs/core/pfline.rst b/docs/core/pfline.rst
@@ -270,6 +270,29 @@ Another slicing method is implemented with the ``.slice[]`` property. The improv
 
 
 
+Concatenation
+=============
+
+Portfolio lines can be concatenated with the ``portfolio.concat()`` function. This only works if the input portfolio lines have contain compatible information (the same frequency, timezone, start-of-day, kind, etc) and, crucially, their indices are gapless and without overlap. To remove any overlap, use the ``.slice[]`` property.
+
+.. exec_code::
+
+   # --- hide: start ---
+   import portfolyo as pf, pandas as pd
+   index = pd.date_range('2024', freq='AS', periods=3)
+   input_df = pd.DataFrame({'w':[200, 220, 300], 'p': [100, 150, 200]}, index)
+   pfl = pf.PfLine(input_df)
+   # --- hide: stop ---
+   # continuation of previous code example
+   index2 = pd.date_range('2025', freq='AS', periods=3)  # 2 years' overlap with pfl
+   pfl2 = pf.PfLine(pd.DataFrame({'w':[22, 30, 40], 'p': [15, 20, 21]}, index))
+   # first two datapoints (until/excl 2026) from pfl, last two datapoints (from/incl 2026) from pfl2 
+   pf.concat([pfl.slice[:'2026'], pfl2.slice['2026':]]) 
+   # --- hide: start ---
+   print(pf.concat([pfl.slice[:'2026'], pfl2.slice['2026':]]))
+   # --- hide: stop ---
+
+
 Volume-only, price-only or revenue-only
 =======================================
 

diff --git a/docs/savefig/fig_hedge.png b/docs/savefig/fig_hedge.png
diff --git a/docs/savefig/fig_offtake.png b/docs/savefig/fig_offtake.png
diff --git a/docs/savefig/fig_plot_pfl.png b/docs/savefig/fig_plot_pfl.png
diff --git a/docs/savefig/fig_plot_pfs.png b/docs/savefig/fig_plot_pfs.png
diff --git a/poetry.lock b/poetry.lock
diff --git a/portfolyo/__init__.py b/portfolyo/__init__.py
@@ -3,9 +3,10 @@
 from . import _version, dev, testing, tools
 from .core import extendpandas  # extend functionalty of pandas
 from .core import suppresswarnings
-from .core.mixins.plot import plot_pfstates
 from .core.pfline import Kind, PfLine, Structure, create
 from .core.pfstate import PfState
+from .core.shared.concat import general as concat
+from .core.shared.plot import plot_pfstates
 from .prices.hedge import hedge
 from .prices.utils import is_peak_hour
 from .tools.changefreq import averagable as asfreq_avg
@@ -17,6 +18,8 @@
 from .tools.unit import Q_, Unit, ureg
 from .tools.wavg import general as wavg
 
+# from .core.shared.concat import general as concat
+
 VOLUME = Kind.VOLUME
 PRICE = Kind.PRICE
 REVENUE = Kind.REVENUE

diff --git a/portfolyo/core/pfline/classes.py b/portfolyo/core/pfline/classes.py
@@ -8,7 +8,7 @@
 import pandas as pd
 
 from ... import tools
-from ..mixins import ExcelClipboardOutput, PfLinePlot, PfLineText
+from ..shared import ExcelClipboardOutput, PfLinePlot, PfLineText
 from ..ndframelike import NDFrameLike
 from . import (
     create,

diff --git a/portfolyo/core/pfline/flat_methods.py b/portfolyo/core/pfline/flat_methods.py
@@ -2,6 +2,8 @@
 
 from typing import TYPE_CHECKING, Any
 
+from portfolyo import tools
+
 from ... import testing
 import pandas as pd
 
@@ -45,6 +47,13 @@ def __init__(self, pfl: FlatPfLine):
 
     def __getitem__(self, arg) -> FlatPfLine:
         newdf = self.pfl.df.loc[arg]
+        try:
+            tools.standardize.assert_frame_standardized(newdf)
+        except AssertionError as e:
+            raise ValueError(
+                "Timeseries not in expected form. See ``portfolyo.standardize()`` for more information."
+            ) from e
+
         return self.pfl.__class__(newdf)  # use same (leaf) class
 
 
@@ -63,4 +72,10 @@ def __getitem__(self, arg) -> FlatPfLine:
             mask &= self.pfl.index < arg.stop
 
         newdf = self.pfl.df.loc[mask]
+        try:
+            tools.standardize.assert_frame_standardized(newdf)
+        except AssertionError as e:
+            raise ValueError(
+                "Timeseries not in expected form. See ``portfolyo.standardize()`` for more information."
+            ) from e
         return self.pfl.__class__(newdf)  # use same (leaf) class
diff --git a/portfolyo/core/pfstate/pfstate.py b/portfolyo/core/pfstate/pfstate.py
@@ -12,7 +12,7 @@
 import pandas as pd
 
 from ... import tools
-from ..mixins import ExcelClipboardOutput, PfStatePlot, PfStateText
+from ..shared import ExcelClipboardOutput, PfStatePlot, PfStateText
 from ..ndframelike import NDFrameLike
 from ..pfline import PfLine, create
 from . import pfstate_helper

diff --git a/portfolyo/core/mixins/__init__.py → portfolyo/core/shared/__init__.py b/portfolyo/core/mixins/__init__.py → portfolyo/core/shared/__init__.py
diff --git a/portfolyo/core/shared/concat.py b/portfolyo/core/shared/concat.py
@@ -0,0 +1,149 @@
+# import pandas as pd
+# import portfolyo as pf
+from __future__ import annotations
+from typing import Iterable
+import pandas as pd
+from portfolyo import tools
+
+from ..pfstate import PfState
+from ..pfline.enums import Structure
+
+from ..pfline import PfLine, create
+from .. import pfstate
+
+
+def general(pfl_or_pfs: Iterable[PfLine | PfState]) -> None:
+    """
+    Based on passed parameters calls either concat_pflines() or concat_pfstates().
+
+    Parameters
+    ----------
+    pfl_or_pfs: Iterable[PfLine | PfState]
+        The input values. Can be either a list of Pflines or PfStates to concatenate.
+
+    Returns
+    -------
+    None
+
+    Notes
+    -----
+    Input portfolio lines must contain compatible information, i.e., same frequency,
+    timezone, start-of-day, and kind. Their indices must be gapless and without overlap.
+
+    For nested pflines, the number and names of their children must match; concatenation
+    is done on a name-by-name basis.
+
+    Concatenation returns the same result regardless of input order.
+
+    """
+    if all(isinstance(item, PfLine) for item in pfl_or_pfs):
+        return concat_pflines(pfl_or_pfs)
+    elif all(isinstance(item, PfState) for item in pfl_or_pfs):
+        return concat_pfstates(pfl_or_pfs)
+    else:
+        raise NotImplementedError(
+            "Concatenation is implemented only for PfState or PfLine."
+        )
+
+
+def concat_pflines(pfls: Iterable[PfLine]) -> PfLine:
+    """
+    Concatenate porfolyo lines along their index.
+
+    Parameters
+    ----------
+    pfls: Iterable[PfLine]
+        The input values.
+
+    Returns
+    -------
+    PfLine
+        Concatenated version of PfLines.
+
+    Notes
+    -----
+    Input portfolio lines must contain compatible information, i.e., same frequency,
+    timezone, start-of-day, and kind. Their indices must be gapless and without overlap.
+
+    For nested pflines, the number and names of their children must match; concatenation
+    is done on a name-by-name basis.
+
+    Concatenation returns the same result regardless of input order.
+    """
+    if len(pfls) < 2:
+        raise NotImplementedError(
+            "Cannot perform operation with less than 2 portfolio lines."
+        )
+    if len({pfl.kind for pfl in pfls}) != 1:
+        raise TypeError("Not possible to concatenate PfLines of different kinds.")
+    if len({pfl.index.freq for pfl in pfls}) != 1:
+        raise TypeError("Not possible to concatenate PfLines of different frequencies.")
+    if len({pfl.index.tz for pfl in pfls}) != 1:
+        raise TypeError("Not possible to concatenate PfLines of different time zones.")
+    if len({tools.startofday.get(pfl.index, "str") for pfl in pfls}) != 1:
+        raise TypeError(
+            "Not possible to concatenate PfLines of different start_of_day."
+        )
+    # we can concatenate only pflines of the same type: nested of flat
+    # with this test and check whether pfls are the same types and they have the same number of children
+    if len({pfl.structure for pfl in pfls}) != 1:
+        raise TypeError("Not possible to concatenate PfLines of different structures.")
+    if pfls[0].structure is Structure.NESTED:
+        child_names = pfls[0].children.keys()
+        for pfl in pfls:
+            diffs = set(child_names) ^ set(pfl.children.keys())
+            if len(diffs) != 0:
+                raise TypeError(
+                    "Not possible to concatenate PfLines with different children names."
+                )
+    # If we reach here, all pfls have same kind, same number and names of children.
+
+    # concat(a,b) and concat(b,a) should give the same result:
+    sorted_pfls = sorted(pfls, key=lambda pfl: pfl.index[0])
+    if pfls[0].structure is Structure.FLAT:
+        # create flat dataframe of parent
+        dataframes_flat = [pfl.df for pfl in sorted_pfls]
+        # concatenate dataframes into one
+        concat_data = pd.concat(dataframes_flat, axis=0)
+        try:
+            # Call create.flatpfline() and catch any ValueError
+            return create.flatpfline(concat_data)
+        except ValueError as e:
+            # Handle the error
+            raise ValueError(
+                "Error by creating PfLine. PfLine is either not gapless or has overlaps"
+            ) from e
+    child_data = {}
+    child_names = pfls[0].children.keys()
+    for cname in child_names:
+        # for every name in children need to concatenate elements
+        child_values = [pfl.children[cname] for pfl in sorted_pfls]
+        child_data[cname] = concat_pflines(child_values)
+
+    # create pfline from dataframes: ->
+    # call the constructor of pfl to check check gaplesnes and overplap
+    return create.nestedpfline(child_data)
+
+
+def concat_pfstates(pfss: Iterable[PfState]) -> PfState:
+    """
+    Concatenate porfolyo states along their index.
+
+    Parameters
+    ----------
+    pfss: Iterable[PfState]
+         The input values.
+
+    Returns
+    -------
+     PfState
+         Concatenated version of PfStates.
+
+    """
+    if len(pfss) < 2:
+        print("Concatenate needs at least two elements.")
+        return
+    offtakevolume = concat_pflines([pfs.offtakevolume for pfs in pfss])
+    sourced = concat_pflines([pfs.sourced for pfs in pfss])
+    unsourcedprice = concat_pflines([pfs.unsourcedprice for pfs in pfss])
+    return pfstate.PfState(offtakevolume, unsourcedprice, sourced)
diff --git a/portfolyo/core/mixins/excelclipboard.py → portfolyo/core/shared/excelclipboard.py b/portfolyo/core/mixins/excelclipboard.py → portfolyo/core/shared/excelclipboard.py
diff --git a/portfolyo/core/mixins/plot.py → portfolyo/core/shared/plot.py b/portfolyo/core/mixins/plot.py → portfolyo/core/shared/plot.py
diff --git a/portfolyo/core/mixins/text.py → portfolyo/core/shared/text.py b/portfolyo/core/mixins/text.py → portfolyo/core/shared/text.py
diff --git a/portfolyo/dev/develop.py b/portfolyo/dev/develop.py
@@ -3,7 +3,7 @@
 """
 
 import datetime as dt
-from typing import Dict, Union, Callable, Tuple
+from typing import Callable, Dict, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -28,30 +28,42 @@ def get_index(
     _seed: int = None,
 ) -> pd.DatetimeIndex:
     """Get index."""
+    # Prepare values.
     if _seed:
         np.random.seed(_seed)
     if not periods:
         standard_len = INDEX_LEN.get(freq, 10)
         periods = np.random.randint(standard_len // 2, standard_len * 2)
-        if tools.freq.up_or_down(freq, "H") <= 0 and tz is None:
-            # Shorten index to not include timestamp that do not exist in Europe/Berlin.
-            periods = min(periods, 4000)
     if not startdate:
-        a, m, d = 2020, 1, 1
-        a += np.random.randint(-4, 4) if _seed else (periods % 20 - 10)
+        a, m, d = 2016, 1, 1  # earliest possible
+        a += np.random.randint(0, 8) if _seed else (periods % 8)
         if tools.freq.up_or_down(freq, "MS") <= 0:
             m += np.random.randint(0, 12) if _seed else (periods % 12)
         if tools.freq.up_or_down(freq, "D") <= 0:
             d += np.random.randint(0, 28) if _seed else (periods % 28)
-        if tools.freq.up_or_down(freq, "H") <= 0 and tz is None:
-            # Start index after DST-start to not include timestamps that do not exist in Europe/Berlin.
-            m, d = 4, 2
         startdate = f"{a}-{m}-{d}"
     if not start_of_day:
         start_of_day = dt.time(hour=0, minute=0)
-    starttime = f"{start_of_day.hour:02}:{start_of_day.minute:02}:00"
-    start = f"{startdate} {starttime}"
-    return pd.date_range(start, freq=freq, periods=periods, tz=tz)
+    # Create index.
+    start = tools.stamp.create(startdate, tz, start_of_day)
+    i = pd.date_range(start, periods=periods, freq=freq)  # tz included in start
+    # Some checks.
+    if tools.freq.up_or_down(freq, "H") <= 0:
+        i = _shorten_index_if_necessary(i, start_of_day)
+    return i
+
+
+def _shorten_index_if_necessary(i, start_of_day) -> pd.DatetimeIndex:
+    """Shorten index with (quarter)hourly values if necessary to ensure that an integer
+    number of calendar days is included."""
+    if (i[-1] - i[0]).total_seconds() < 23 * 3600:
+        raise ValueError("Index must contain at least one full day")
+    # Must ensure that index is integer number of days.
+    for _ in range(0, 100):  # max 100 quarterhours in a day (@ end of DST)
+        if tools.right.stamp(i[-1], i.freq).time() == start_of_day:
+            return i
+        i = i[:-1]
+    raise ValueError("Can't find timestamp to end index on.")
 
 
 def get_value(