Skip to content

Commit

Permalink
fixed wavg error, added func avoid_frame_of_objects
Browse files Browse the repository at this point in the history
  • Loading branch information
Alina Voilova committed Jul 15, 2024
1 parent 2fe6f0f commit 53638a9
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 30 deletions.
9 changes: 5 additions & 4 deletions dev_scripts/test_new_ver.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from matplotlib import pyplot as plt
import pandas as pd
from portfolyo import Kind, dev

i = pd.date_range(
"2020-04-06", "2020-04-16", freq="MS", inclusive="left", tz="Europe/Berlin"
)
pfl = dev.get_flatpfline(i, Kind.COMPLETE)
index = pd.date_range("2020-01-01", "2021-01-01", freq="MS", tz=None)
pfl = dev.get_pfline(index, nlevels=2, childcount=1, kind=Kind.VOLUME)
pfl.plot(children=True)
plt.show()
22 changes: 8 additions & 14 deletions portfolyo/core/pfline/interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

import numpy as np
import pandas as pd
import pint_pandas

from ... import tools
from . import classes, create
Expand Down Expand Up @@ -313,27 +312,22 @@ def _from_data(
if data is None:
return InOp()

elif isinstance(data, int) or isinstance(data, float):
# TODO: if int, change to float?
elif isinstance(data, int):
return InOp(agn=float(data))

elif isinstance(data, float):
return InOp(agn=data)

elif isinstance(data, tools.unit.Q_):
return InOp(**{_unit2attr(data.units): data})

elif isinstance(data, pd.Series) and isinstance(data.index, pd.DatetimeIndex):
# timeseries
if hasattr(data, "pint"): # pint timeseries
if not isinstance(data.dtype, pint_pandas.PintType):
data = pd.Series([v.magnitude for v in data.values], data.index).astype(
f"pint[{data.values[0].units}]"
)
return InOp(**{_unit2attr(data.pint.units): data})
elif data.dtype == object: # timeeries of objects -> maybe Quantities?
if len(data) and isinstance(val := data.values[0], tools.unit.Q_):
# use unit of first value to find dimension
return InOp(**{_unit2attr(val.u): data})
else: # assume float or int
data = tools.unit.avoid_frame_of_objects(data)
if data.dtype in [float, int]:
return InOp(agn=data)
else:
return InOp(**{_unit2attr(data.pint.units): data})

elif (
isinstance(data, pd.DataFrame)
Expand Down
47 changes: 46 additions & 1 deletion portfolyo/tools/unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from pathlib import Path
from typing import Tuple, overload

from .types import Series_or_DataFrame
import pandas as pd
import pint
import pint_pandas
Expand Down Expand Up @@ -160,3 +160,48 @@ def split_magn_unit(
raise TypeError("For dataframes, handle the series seperately.")
else: # int, float, bool, timestamp, ...
return val, None


def avoid_frame_of_objects(fr: Series_or_DataFrame) -> Series_or_DataFrame:
"""Ensure a Series or Dataframe does not have objects as its values,
if possible.
Parameters:
-----------
fr : Series_or_DataFrame
The input data structure, which can be either a pandas Series or DataFrame.
Expected int-Series, float-Series, pint-Series, or Series of pint quantities (of equal dimensionality).
Returns:
--------
Series_or_DataFrame
The transformed data structure.
"""

if isinstance(fr, pd.DataFrame):
return pd.DataFrame({col: avoid_frame_of_objects(s) for col, s in fr.items()})

# fr is now a Series.

if fr.dtype == int:
return fr.astype(float)
if fr.dtype == float:
return fr
if hasattr(fr, "pint"):
if isinstance(fr.dtype, pint_pandas.PintType):
return fr
# We may have a series of pint quantities. Convert to pint-series, if possible.
dimensions = {v.dimensionality for v in fr.values}
if len(dimensions) != 1:
raise ValueError(
f"Expected a Series with quantities of the same dimension; got {dimensions}."
)
# Convert all values to same unit.
units = fr.values[0].units
magnitudes = [v.to(units).magnitude for v in fr.values]
return pd.Series(magnitudes, fr.index, dtype=f"pint[{units}]")
raise TypeError(
"Expected int-Series, float-Series, pint-Series, or Series of pint quantities (of equal dimensionality)."
)
9 changes: 8 additions & 1 deletion portfolyo/tools/visualize/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,14 @@ def plot_timeseries_as_hline(
s = prepare_ax_and_s(ax, s) # ensure unit compatibility (if possible)
categories = Categories(s)
# Center around x-tick:
ax.hlines(categories.y(), categories.x() - 0.4, categories.x() + 0.4, **kwargs)
ax.hlines(
pd.Series(
categories.y(), categories.x()
), # HACK: categories.y() no longer working after update of pint-pandas to 0.6
categories.x() - 0.4,
categories.x() + 0.4,
**kwargs,
)
ax.set_xticks(categories.x(MAX_XLABELS), categories.labels(MAX_XLABELS))
set_data_labels(ax, categories.x(), categories.y(), labelfmt, True)
ax.autoscale()
Expand Down
16 changes: 10 additions & 6 deletions portfolyo/tools/wavg.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,13 +438,17 @@ def rowvalue_uniformity(df: pd.DataFrame) -> pd.Series:


def weights_as_series(weights: Iterable | Mapping, refindex: Iterable) -> pd.Series:
# Step 1: turn into Series.
if isinstance(weights, pd.Series):
return weights
if isinstance(weights, Mapping):
return pd.Series(weights)
if isinstance(weights, Iterable):
return pd.Series(weights, refindex)
raise TypeError("``weights`` must be iterable or mapping.")
pass
elif isinstance(weights, Mapping):
weights = pd.Series(weights)
elif isinstance(weights, Iterable):
weights = pd.Series(weights, refindex)
else:
raise TypeError("``weights`` must be iterable or mapping.")
# Step 2: avoid Series of Quantity-objects (convert to pint-series instead).
return tools_unit.avoid_frame_of_objects(weights)


def values_areuniform(series: pd.Series, mask: Iterable = None) -> bool:
Expand Down
5 changes: 2 additions & 3 deletions tests/tools/test_wavg.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,8 @@ def do_test_dataframe(values: pd.DataFrame, weights: Any, expected: Any, **kwarg
with pytest.raises(expected):
tools.wavg.dataframe(values, weights, **kwargs)
return
pf.testing.assert_series_equal(
tools.wavg.dataframe(values, weights, **kwargs), expected
)
result = tools.wavg.dataframe(values, weights, **kwargs)
pf.testing.assert_series_equal(result, expected)


@pytest.mark.parametrize("weightsas", ["none", "list", "dict", "series"])
Expand Down
2 changes: 1 addition & 1 deletion tests/tools/visualize/test_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_pfline_plot(
index = pd.date_range("2020-01-01", "2021-01-01", freq=freq, tz=None)
pfl = pf.dev.get_pfline(index, nlevels=levels, childcount=childcount, kind=kind)
pfl.plot(children=children)
plt.show()
# plt.show()


@pytest.mark.parametrize("childcount", [1, 2, 3])
Expand Down

0 comments on commit 53638a9

Please sign in to comment.