Merge branch 'develop' into intersect

rwijtvliet · Apr 17, 2024 · 925d81a · 925d81a
2 parents b00abac + d26f3e9
commit 925d81a
Show file tree

Hide file tree

Showing 11 changed files with 584 additions and 121 deletions.
diff --git a/.gitignore b/.gitignore
@@ -49,6 +49,7 @@ coverage.xml
 *.py,cover
 .hypothesis/
 .pytest_cache/
+test-output.xml
 cover/
 
 # Translations
@@ -160,3 +161,4 @@ cython_debug/
 .issues/
 .DS_Store
 test.xlsx
+
diff --git a/docs/core/toplevel.rst b/docs/core/toplevel.rst
@@ -0,0 +1,30 @@
+.. |_| unicode:: 0xA0 
+   :trim:
+
+===============
+Top-level tools
+===============
+
+Some tools for working with ``pandas.Series``, ``pandas.DataFrame``, ``portfolyo.PfLine`` and ``portfolyo.PfState`` objects are available at the root of the package. They are concisely listed below.
+
+----------------------
+Work on pandas objects 
+----------------------
+
+* ``portfolyo.asfreq_avg()`` Changes the frequency of a Series or DataFrame with "averagable" data. See :doc:`this page<../specialized_topics/resampling>` for more information.
+
+* ``portfolyo.asfreq_sum()`` Changes the frequency of a Series or DataFrame with "summable" data. See :doc:`this page<../specialized_topics/resampling>` for more information. 
+
+* ``portfolyo.wavg()`` Calculates weighted average of a Series or DataFrame.
+
+* ``portfolyo.standardize()`` Ensures/asserts a Series or DataFrame follows necessary rules to initialize PfLine with.
+
+-------------------------
+Work on portfolyo objects
+-------------------------
+
+* ``portfolyo.concat()`` Concatenates PfLines into one PfLine.
+
+* ``portfolyo.plot_pfstates()`` Plots several PfStates in one figure.
+
+
diff --git a/docs/index.rst b/docs/index.rst
@@ -68,6 +68,7 @@ Contents
    core/pfline
    core/pfstate
    core/interoperability
+   core/toplevel
 
 .. toctree::
    :maxdepth: 1
@@ -94,4 +95,4 @@ Contents
    :maxdepth: 2
    :caption: Full reference
 
-   full_reference
+   full_reference
diff --git a/docs/specialized_topics/resampling.rst b/docs/specialized_topics/resampling.rst
@@ -137,4 +137,4 @@ The reason for the higher price in the previous example, is that, there, the pri
 Resampling with ``portfolyo``
 -----------------------------
 
-When changing the frequency of a ``PfLine`` or ``PfState`` object, the considerations above are automatically taken into account. If you are in the situation of having to change the frequency of a ``pandas.Series`` or ``DataFrame`` with a ``DatetimeIndex``, however, the relevant functions are also available at the ``portfolyo.tools.changefreq`` module.
+When changing the frequency of a ``PfLine`` or ``PfState`` object, the considerations above are automatically taken into account when using the ``.asfreq()`` method. If you are in the situation of having to change the frequency of a ``pandas.Series`` or ``DataFrame`` with a ``DatetimeIndex``, however, the relevant functions are also available at the package root, as the ``portfolyo.asfreq_avg()`` and ``portfolyo.asfreq_sum()`` functions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/portfolyo/__init__.py b/portfolyo/__init__.py
@@ -1,18 +1,22 @@
 """Package to analyse and manipulate timeseries related to power and gas offtake portfolios."""
 
-from . import _version, dev, tools
+from . import _version, dev, testing, tools
 from .core import extendpandas  # extend functionalty of pandas
 from .core import suppresswarnings
 from .utilities.plot import plot_pfstates
 from .core.pfline import Kind, PfLine, Structure, create
 from .core.pfstate import PfState
+from .core.shared.concat import general as concat
+from .core.shared.plot import plot_pfstates
 from .prices.hedge import hedge
 from .prices.utils import is_peak_hour
+from .tools.changefreq import averagable as asfreq_avg
+from .tools.changefreq import summable as asfreq_sum
 from .tools.changeyear import characterize_index, map_frame_to_year
 from .tools.freq import FREQUENCIES
 from .tools.standardize import frame as standardize
 from .tools.tzone import force_agnostic, force_aware
-from .tools.unit import Q_, ureg, Unit
+from .tools.unit import Q_, Unit, ureg
 from .tools.wavg import general as wavg
 from .utilities.concat import general as concat
 from .utilities.intersect import indexable as intersection
@@ -28,4 +32,4 @@
 suppresswarnings.apply()
 
 __version__ = _version.get_versions()["version"]
-__all__ = ["tools", "dev", "PfLine", "PfState"]
+__all__ = ["tools", "dev", "testing", "PfLine", "PfState"]
diff --git a/portfolyo/tools2/concat.py b/portfolyo/tools2/concat.py
@@ -0,0 +1,149 @@
+# import pandas as pd
+# import portfolyo as pf
+from __future__ import annotations
+from typing import Iterable
+import pandas as pd
+from portfolyo import tools
+
+from ..core.pfstate import PfState
+from ..core.pfline.enums import Structure
+
+from ..core.pfline import PfLine, create
+from ..core import pfstate
+
+
+def general(pfl_or_pfs: Iterable[PfLine | PfState]) -> None:
+    """
+    Based on passed parameters calls either concat_pflines() or concat_pfstates().
+
+    Parameters
+    ----------
+    pfl_or_pfs: Iterable[PfLine | PfState]
+        The input values. Can be either a list of Pflines or PfStates to concatenate.
+
+    Returns
+    -------
+    None
+
+    Notes
+    -----
+    Input portfolio lines must contain compatible information, i.e., same frequency,
+    timezone, start-of-day, and kind. Their indices must be gapless and without overlap.
+
+    For nested pflines, the number and names of their children must match; concatenation
+    is done on a name-by-name basis.
+
+    Concatenation returns the same result regardless of input order.
+
+    """
+    if all(isinstance(item, PfLine) for item in pfl_or_pfs):
+        return concat_pflines(pfl_or_pfs)
+    elif all(isinstance(item, PfState) for item in pfl_or_pfs):
+        return concat_pfstates(pfl_or_pfs)
+    else:
+        raise NotImplementedError(
+            "Concatenation is implemented only for PfState or PfLine."
+        )
+
+
+def concat_pflines(pfls: Iterable[PfLine]) -> PfLine:
+    """
+    Concatenate porfolyo lines along their index.
+
+    Parameters
+    ----------
+    pfls: Iterable[PfLine]
+        The input values.
+
+    Returns
+    -------
+    PfLine
+        Concatenated version of PfLines.
+
+    Notes
+    -----
+    Input portfolio lines must contain compatible information, i.e., same frequency,
+    timezone, start-of-day, and kind. Their indices must be gapless and without overlap.
+
+    For nested pflines, the number and names of their children must match; concatenation
+    is done on a name-by-name basis.
+
+    Concatenation returns the same result regardless of input order.
+    """
+    if len(pfls) < 2:
+        raise NotImplementedError(
+            "Cannot perform operation with less than 2 portfolio lines."
+        )
+    if len({pfl.kind for pfl in pfls}) != 1:
+        raise TypeError("Not possible to concatenate PfLines of different kinds.")
+    if len({pfl.index.freq for pfl in pfls}) != 1:
+        raise TypeError("Not possible to concatenate PfLines of different frequencies.")
+    if len({pfl.index.tz for pfl in pfls}) != 1:
+        raise TypeError("Not possible to concatenate PfLines of different time zones.")
+    if len({tools.startofday.get(pfl.index, "str") for pfl in pfls}) != 1:
+        raise TypeError(
+            "Not possible to concatenate PfLines of different start_of_day."
+        )
+    # we can concatenate only pflines of the same type: nested of flat
+    # with this test and check whether pfls are the same types and they have the same number of children
+    if len({pfl.structure for pfl in pfls}) != 1:
+        raise TypeError("Not possible to concatenate PfLines of different structures.")
+    if pfls[0].structure is Structure.NESTED:
+        child_names = pfls[0].children.keys()
+        for pfl in pfls:
+            diffs = set(child_names) ^ set(pfl.children.keys())
+            if len(diffs) != 0:
+                raise TypeError(
+                    "Not possible to concatenate PfLines with different children names."
+                )
+    # If we reach here, all pfls have same kind, same number and names of children.
+
+    # concat(a,b) and concat(b,a) should give the same result:
+    sorted_pfls = sorted(pfls, key=lambda pfl: pfl.index[0])
+    if pfls[0].structure is Structure.FLAT:
+        # create flat dataframe of parent
+        dataframes_flat = [pfl.df for pfl in sorted_pfls]
+        # concatenate dataframes into one
+        concat_data = pd.concat(dataframes_flat, axis=0)
+        try:
+            # Call create.flatpfline() and catch any ValueError
+            return create.flatpfline(concat_data)
+        except ValueError as e:
+            # Handle the error
+            raise ValueError(
+                "Error by creating PfLine. PfLine is either not gapless or has overlaps"
+            ) from e
+    child_data = {}
+    child_names = pfls[0].children.keys()
+    for cname in child_names:
+        # for every name in children need to concatenate elements
+        child_values = [pfl.children[cname] for pfl in sorted_pfls]
+        child_data[cname] = concat_pflines(child_values)
+
+    # create pfline from dataframes: ->
+    # call the constructor of pfl to check check gaplesnes and overplap
+    return create.nestedpfline(child_data)
+
+
+def concat_pfstates(pfss: Iterable[PfState]) -> PfState:
+    """
+    Concatenate porfolyo states along their index.
+
+    Parameters
+    ----------
+    pfss: Iterable[PfState]
+         The input values.
+
+    Returns
+    -------
+     PfState
+         Concatenated version of PfStates.
+
+    """
+    if len(pfss) < 2:
+        print("Concatenate needs at least two elements.")
+        return
+    offtakevolume = concat_pflines([pfs.offtakevolume for pfs in pfss])
+    sourced = concat_pflines([pfs.sourced for pfs in pfss])
+    unsourcedprice = concat_pflines([pfs.unsourcedprice for pfs in pfss])
+    return pfstate.PfState(offtakevolume, unsourcedprice, sourced)
diff --git a/portfolyo/tools2/intersect.py b/portfolyo/tools2/intersect.py
@@ -0,0 +1,47 @@
+from portfolyo.tools.intersect import indices_flex
+from ..core.pfline import PfLine
+from ..core.pfstate import PfState
+from typing import List, Union
+
+import pandas as pd
+
+
+def indexable(
+    *frames: Union[pd.Series, pd.DataFrame, PfLine, PfState],
+    ignore_freq: bool = False,
+    ignore_tz: bool = False,
+    ignore_start_of_day: bool = False,
+) -> List[Union[pd.Series, pd.DataFrame, PfLine, PfState]]:
+    """Intersect several dataframes and/or series.
+
+    Parameters
+    ----------
+    *frames : pd.Series and/or pd.DataFrame and/or PfLines and/or PfStates
+        The frames to intersect.
+    ignore_freq: bool, optional (default: False)
+        If True, do the intersection even if the frequencies do not match; drop the
+        time periods that do not (fully) exist in either of the frames.
+    ignore_tz: bool, optional (default: False)
+        If True, ignore the timezones; perform the intersection using 'wall time'.
+    ignore_start_of_day: bool, optional (default: False)
+        If True, perform the intersection even if the frames have a different start-of-day.
+        The start-of-day of the original frames is preserved, even if the frequency is shorter
+        than daily.
+
+    Returns
+    -------
+    list of series and/or dataframes
+        As input, but trimmed to their intersection.
+
+    Notes
+    -----
+    The indices must have equal frequency, timezone, start-of-day. Otherwise, an error
+    is raised. If there is no overlap, empty frames are returned.
+    """
+    new_idxs = indices_flex(
+        *[fr.index for fr in frames],
+        ignore_freq=ignore_freq,
+        ignore_tz=ignore_tz,
+        ignore_start_of_day=ignore_start_of_day,
+    )
+    return [fr.loc[idx] for idx, fr in zip(new_idxs, frames)]