From c0181d2afb4256af2253e7e8aabddcd00605a660 Mon Sep 17 00:00:00 2001 From: rwijtvliet Date: Mon, 11 Nov 2024 01:29:28 +0100 Subject: [PATCH] normalize units + tests. Simple interop --- poetry.lock | 24 +- portfolyo/__init__.py | 2 +- portfolyo/core/pfline/flat_helper.py | 3 +- portfolyo/core/pfline/flat_methods.py | 2 +- portfolyo/core/pfline/interop.py | 198 ++++++++--------- portfolyo/core/pfline/nested_methods.py | 2 +- portfolyo/core/pfline/text.py | 37 +++- portfolyo/core/pfstate/text.py | 7 +- portfolyo/core/shared/text.py | 37 +++- portfolyo/tools/characterize.py | 71 ++++++ portfolyo/tools/freq.py | 23 +- portfolyo/tools/standardize.py | 26 +-- portfolyo/tools/testing.py | 70 ++++-- portfolyo/tools/unit.py | 137 +++++++----- portfolyo/tools/unitdefinitions.txt | 7 - portfolyo/tools/wavg.py | 10 +- pyproject.toml | 3 +- tests/core/pfline/test_flat_helper.py | 6 +- tests/core/pfline/test_nested_children.py | 4 +- ...est_pfline_arithmatic_numeric_and_error.py | 2 +- tests/core/pfline/test_pfline_init.py | 2 +- tests/core/pfstate/test_pfstate_arithmatic.py | 2 +- tests/tools/test_changefreq.py | 2 +- tests/tools/test_frames.py | 2 +- tests/tools/test_intersect.py | 2 +- tests/tools/test_intersect_flex_frame.py | 16 +- tests/tools/test_peakconvert.py | 6 +- tests/tools/test_trim.py | 2 +- tests/tools/test_tzone.py | 2 +- tests/tools/test_unit.py | 205 ++++++++++++++++++ 30 files changed, 637 insertions(+), 275 deletions(-) create mode 100644 portfolyo/tools/characterize.py diff --git a/poetry.lock b/poetry.lock index 2004fc1..8ed9b2e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1442,6 +1442,26 @@ files = [ [package.dependencies] traitlets = "*" +[[package]] +name = "mip" +version = "1.16rc0" +description = "Python tools for Modeling and Solving Mixed-Integer Linear Programs (MIPs)" +optional = false +python-versions = "<3.13,>=3.7" +files = [ + {file = "mip-1.16rc0-py3-none-any.whl", hash = "sha256:62e04c27bce56da94d070f6b03d9e342600d10f4016984334af7dae8c0f9fd09"}, + {file = "mip-1.16rc0.tar.gz", hash = "sha256:f250083a41f4e1f61b6885041d64ed2a88bff5ead7165c329b5b31d07c517466"}, +] + +[package.dependencies] +cffi = ">=1.15" + +[package.extras] +gurobi = ["gurobipy (>=8)"] +highs = ["highspy (>=1.5.3)"] +numpy = ["numpy (==1.21.*)", "numpy (==1.24.*)", "numpy (>=1.25)"] +test = ["matplotlib (==3.5.3)", "matplotlib (==3.6.2)", "matplotlib (>=3.7)", "networkx (==2.6.3)", "networkx (==2.8.8)", "pytest (>=7.4)"] + [[package]] name = "mistune" version = "3.0.2" @@ -3113,5 +3133,5 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" -python-versions = "^3.10" -content-hash = "91a127d7701e3b613b177c092f7336596c37f0ae482fe94c1ba11a3a09be208f" +python-versions = "^3.10,<3.13" +content-hash = "52f67b9ae8d57032f4d7a23ed97d9b3d0fcc876d287b61bd3edc2ddb8f8888e4" diff --git a/portfolyo/__init__.py b/portfolyo/__init__.py index 0c94ed2..2b32e95 100644 --- a/portfolyo/__init__.py +++ b/portfolyo/__init__.py @@ -19,7 +19,7 @@ from .tools.standardize import frame as standardize from .tools.tzone import force_agnostic, force_aware from .tools.unit import Q_, Unit, ureg -from .tools.unit import avoid_frame_of_objects as pintframe +from .tools.unit import normalize_frame as pintframe from .tools.wavg import general as wavg import tomli diff --git a/portfolyo/core/pfline/flat_helper.py b/portfolyo/core/pfline/flat_helper.py index c24b242..f2c9842 100644 --- a/portfolyo/core/pfline/flat_helper.py +++ b/portfolyo/core/pfline/flat_helper.py @@ -32,8 +32,7 @@ def _dataframe(data: Any) -> pd.DataFrame: # Check data types. if inop.nodim is not None: raise ValueError( - f"Found explicitly dimensionless ({inop.nodim}) data. Use 'w', 'q', 'p', 'r' (e.g. as dictionary" - " keys), or explicitly pass values with a ``pint`` unit, to indicate dimensionality." + f"Found explicitly dimensionless ({inop.nodim}) data. Add a ``pint`` unit to indicate dimensionality." ) # Make actual dataframe. diff --git a/portfolyo/core/pfline/flat_methods.py b/portfolyo/core/pfline/flat_methods.py index f74eb35..171446f 100644 --- a/portfolyo/core/pfline/flat_methods.py +++ b/portfolyo/core/pfline/flat_methods.py @@ -72,7 +72,7 @@ def __eq__(self: FlatPfLine, other: Any) -> bool: if not isinstance(other, self.__class__): return False try: - tools.testing.assert_frame_equal(self.df, other.df, rtol=1e-7) + tools.testing.assert_dataframe_equal(self.df, other.df, rtol=1e-7) return True except AssertionError: return False diff --git a/portfolyo/core/pfline/interop.py b/portfolyo/core/pfline/interop.py index 7ecb303..ffd3075 100644 --- a/portfolyo/core/pfline/interop.py +++ b/portfolyo/core/pfline/interop.py @@ -8,7 +8,8 @@ import numpy as np import pandas as pd -from pint import DimensionalityError +import pint +import pint_pandas from ... import tools from . import classes, create @@ -35,9 +36,6 @@ class InOp: . Turn all into timeseries: inop = inop.to_timeseries() - . Assign unit-agnostic to unit-specific: - inop = inop.assign_agn('p') - . Check for consistency and fill the missing attributes as much as possible: inop = inop.make_consistent() @@ -45,19 +43,18 @@ class InOp: inop = inop.to_df() """ - w: tools.unit.Q_ | pd.Series = None - q: tools.unit.Q_ | pd.Series = None - p: tools.unit.Q_ | pd.Series = None - r: tools.unit.Q_ | pd.Series = None - nodim: tools.unit.Q_ | pd.Series = None # explicitly dimensionless + w: pint.Quantity | pd.Series | None = None + q: pint.Quantity | pd.Series | None = None + p: pint.Quantity | pd.Series | None = None + r: pint.Quantity | pd.Series | None = None + nodim: float | pd.Series | None = None # explicitly dimensionless def __post_init__(self): - # Add correct units and check type. - object.__setattr__(self, "w", _check_unit(self.w, "w")) - object.__setattr__(self, "q", _check_unit(self.q, "q")) - object.__setattr__(self, "p", _check_unit(self.p, "p")) - object.__setattr__(self, "r", _check_unit(self.r, "r")) - object.__setattr__(self, "nodim", _check_unit(self.nodim, "nodim")) + object.__setattr__(self, "w", check_dimensionality(self.w, "w")) + object.__setattr__(self, "q", check_dimensionality(self.q, "q")) + object.__setattr__(self, "p", check_dimensionality(self.p, "p")) + object.__setattr__(self, "r", check_dimensionality(self.r, "r")) + object.__setattr__(self, "nodim", check_dimensionality(self.nodim, "nodim")) @classmethod def from_data(cls, data): @@ -66,7 +63,7 @@ def from_data(cls, data): def to_timeseries(self, ref_index=None) -> InOp: """Turn all values into timeseries or None. If none of the attributes is a timeseries, and no ``ref_index`` is provided, raise Error. If >1 is a timeseries, - store only the timestamps where they overlap (i.e., intersection).""" + keep only the timestamps where they overlap (i.e., intersection).""" # Get index. indices = [] if ref_index is None else [ref_index] for attr in _ATTRIBUTES: @@ -81,44 +78,44 @@ def to_timeseries(self, ref_index=None) -> InOp: val = getattr(self, attr) if val is None: continue - elif isinstance(val, pd.Series): - kwargs[attr] = val.loc[index] - elif isinstance(val, tools.unit.Q_): - kwargs[attr] = pd.Series(val.m, index, dtype=f"pint[{val.units:P}]") - else: # float + elif isinstance(val, float): kwargs[attr] = pd.Series(val, index) + elif isinstance(val, pint.Quantity): + kwargs[attr] = pd.Series(val.magnitude, index).astype( + f"pint[{val.units}]" + ) + else: # float- or pint-series + kwargs[attr] = val.loc[index] # Return as new InOp instance. return InOp(**kwargs) - def drop(self, da: str) -> InOp: - """Drop part of the information and return new InOp object.""" - return InOp(**{attr: getattr(self, attr) for attr in _ATTRIBUTES if attr != da}) + # def drop(self, da: str) -> InOp: + # """Drop part of the information and return new InOp object.""" + # return InOp(**{attr: getattr(self, attr) for attr in _ATTRIBUTES if attr != da}) def make_consistent(self) -> InOp: - """Fill as much of the data as possible. All data must be None or timeseries, and - self.agn must have been assigned (or dropped).""" + """Fill as much of the data as possible. All data must be None or timeseries.""" self._assert_all_timeseries() - # If we land here, there is no self.agn, and all other attributes are timeseries (or None). + # If we land here, all attributes are timeseries (or None). w, q, p, r, nodim = self.w, self.q, self.p, self.r, self.nodim + duration = tools.duration.frame # Volumes. if w is not None and q is not None: try: - tools.testing.assert_series_equal( - w, q / q.index.duration, check_names=False - ) + tools.testing.assert_series_equal(w, q / duration(q), check_names=False) except AssertionError as e: raise ValueError("Values for w and q are not consistent.") from e elif w is not None and q is None: - q = w * w.index.duration + q = w * duration(w) elif w is None and q is not None: - w = q / q.index.duration + w = q / duration(q) elif w is None and q is None and p is not None and r is not None: q = r / p - w = q / q.index.duration + w = q / duration(q) # If we land here, there are no more options to find w and q. # They are consistent with each other but might be inconsistent with p and r. @@ -160,37 +157,24 @@ def make_consistent(self) -> InOp: def to_df(self) -> pd.DataFrame: """Create dataframe with (at most) columns w, q, p, r, nodim. All data must be - None or timeseries, and self.agn must have been assigned (or dropped). Also, you'll - probably want to have run the `.make_consistent()` method.""" + None or timeseries. Also, you'll probably want to have run the `.make_consistent()` method. + """ self._assert_all_timeseries() # If we land here, all attributes are timeseries (or None). - series = {} - for attr in _ATTRIBUTES: - val = getattr(self, attr) - if val is None: - continue - series[attr] = val - - return pd.DataFrame(series) + return pd.DataFrame( + {attr: s for attr in _ATTRIBUTES if (s := getattr(self, attr)) is not None} + ) def _assert_all_timeseries(self): - """Raise Error if object (still) has agnostic data or if not all data are timeseries.""" - - # Guard clause. - errors = {} + """Raise Error if not all data are timeseries.""" for attr in _ATTRIBUTES: - val = getattr(self, attr) - if val is None: - continue - if isinstance(val, pd.Series): + if isinstance(getattr(self, attr), None | pd.Series): continue - errors[attr] = type(val) - if errors: raise ValueError( - "Object contains non-timeseries data; first use `.to_timeseries()`." + f"Attribute {attr} contains non-timeseries data; use `.to_timeseries()`." ) def __bool__(self) -> bool: @@ -205,71 +189,65 @@ def __eq__(self, other) -> bool: return _equal(self, other) -def _check_unit( - v: float | int | tools.unit.Q_ | pd.Series, attr: str -) -> float | tools.unit.Q | pd.Series: - """Check the unit and dimensionality of a value. +def raisedimerror_receivedfloat(expected: pint.util.UnitsContainer) -> None: + raise pint.DimensionalityError( + expected, + tools.unit.NAMES_AND_DIMENSIONS["nodim"], + extra_msg="Float or int only allowed for dimensionless value. To specify a physical quantity, add a unit.", + ) + + +def raisedimerror_receivedincorrect( + expected: pint.util.UnitsContainer, received: pint.util.UnitsContainer +) -> None: + raise pint.DimensionalityError( + expected, + received, + extra_msg=f"Incorrect dimension for this attribute; expected {expected}, got {received}.", + ) + + +def check_dimensionality( + v: None | float | int | pint.Quantity | pd.Series, attr: str +) -> None | float | pint.Quantity | pd.Series: + """Check the dimensionality of a value. This function verifies if the given value `v` has the correct unit dimensionality corresponding to the attribute `attr`. - """ # Retrieve the expected dimensionality for the given attribute. - dim = tools.unit.NAMES_AND_DIMENSIONS[attr] if v is None: return v + expected_dim = tools.unit.NAMES_AND_DIMENSIONS[attr] + v = tools.unit.normalize(v) + # Check if the value is a float or int and ensure it is dimensionless. - if isinstance(v, float | int): - if dim is not tools.unit.NAMES_AND_DIMENSIONS["nodim"]: - raise DimensionalityError( - dim, - tools.unit.NAMES_AND_DIMENSIONS["nodim"], - extra_msg="Float or int only allowed for dimensionless value. To specify a physical quantity, add a unit.", - ) - else: - return float(v) - - elif isinstance(v, tools.unit.Q_): - if not v.dimensionality == dim: - raise DimensionalityError( - dim, - v.pint.dimensionality, - extra_msg=f"Incorrect dimension for this attribute; expected {dim}, got {v.pint.dimensionality}", - ) - # if the dim is nodim, we retun float - elif v.dimensionality == tools.unit.NAMES_AND_DIMENSIONS["nodim"]: - return float(v) - # else - else: - return v - - elif isinstance(v, pd.Series) and isinstance(v.index, pd.DatetimeIndex): - # Turn into floats-series or pint-series. - v = tools.unit.avoid_frame_of_objects(v) - if pd.api.types.is_float_dtype(v): - if dim is not tools.unit.NAMES_AND_DIMENSIONS["nodim"]: - raise DimensionalityError( - dim, - tools.unit.NAMES_AND_DIMENSIONS["nodim"], - extra_msg=f". Float or int only allowed for dimensionless value, got {dim}. To specify a physical quantity, add a unit.", - ) - else: - return v + if isinstance(v, float): + if expected_dim != tools.unit.NAMES_AND_DIMENSIONS["nodim"]: + raisedimerror_receivedfloat(expected_dim) + return v + + elif isinstance(v, pint.Quantity): + if expected_dim != v.dimensionality: + raisedimerror_receivedincorrect(expected_dim, v.dimensionality) + return v + + elif isinstance(v, pd.Series): + # Is pint-series or float-series. + + if not isinstance(v.dtype, pint_pandas.PintType): + if expected_dim != tools.unit.NAMES_AND_DIMENSIONS["nodim"]: + raisedimerror_receivedfloat(expected_dim) + else: - if not v.pint.dimensionality == dim: - raise DimensionalityError( - dim, - v.pint.dimensionality, - extra_msg=f"Incorrect dimension for this attribute; expected {dim}, got {v.pint.dimensionality}", - ) - # Check if series is standardised. + if expected_dim != v.pint.dimensionality: + raisedimerror_receivedincorrect(expected_dim, v.pint.dimensionality) + try: - tools.standardize.assert_frame_standardized(v) + tools.testing.assert_index_standardized(v.index) except AssertionError as e: - raise ValueError( - "Timeseries not in expected form. See ``portfolyo.standardize()`` for more information." - ) from e + raise ValueError("Timeseries not in expected form.") from e return v @@ -286,7 +264,7 @@ def _unit2attr(unit) -> str: def _from_data( - data: float | tools.unit.Q_ | pd.Series | Dict | pd.DataFrame | Iterable | Mapping, + data: float | pint.Quantity | pd.Series | Dict | pd.DataFrame | Iterable | Mapping, ) -> InOp: """Turn ``data`` into a InterOp object.""" @@ -299,12 +277,12 @@ def _from_data( elif isinstance(data, float): return InOp(nodim=data) - elif isinstance(data, tools.unit.Q_): + elif isinstance(data, pint.Quantity): return InOp(**{_unit2attr(data.units): data}) elif isinstance(data, pd.Series) and isinstance(data.index, pd.DatetimeIndex): # timeseries - data = tools.unit.avoid_frame_of_objects(data) + data = tools.unit.normalize_frame(data) if pd.api.types.is_float_dtype(data): return InOp(nodim=data) else: diff --git a/portfolyo/core/pfline/nested_methods.py b/portfolyo/core/pfline/nested_methods.py index 440294a..6814a0f 100644 --- a/portfolyo/core/pfline/nested_methods.py +++ b/portfolyo/core/pfline/nested_methods.py @@ -66,7 +66,7 @@ def agg(self) -> pd.DataFrame: dfs.append(child.agg().to_frame(name).T) else: dfs.append(tools.frame.add_header(child.agg(), name, 0)) - return tools.unit.avoid_frame_of_objects(tools.frame.concat(dfs)) + return tools.unit.normalize_frame(tools.frame.concat(dfs)) class LocIndexer: diff --git a/portfolyo/core/pfline/text.py b/portfolyo/core/pfline/text.py index a8c196a..26b389d 100644 --- a/portfolyo/core/pfline/text.py +++ b/portfolyo/core/pfline/text.py @@ -8,6 +8,7 @@ from ..shared import text as shared_text from . import classes from .enums import Kind +from ... import tools if TYPE_CHECKING: from .classes import PfLine @@ -28,15 +29,17 @@ def _children_info(pfl: PfLine) -> Iterable[str]: return [". Children: " + ("none" if not childtxt else ", ".join(childtxt))] -def _flatdatablock(pfl: PfLine, cols: Iterable[str], num_of_ts: int) -> Iterable[str]: +def _flatdatablock( + pfl: PfLine, cols_and_units: dict[str, tools.unit.Unit], num_of_ts: int +) -> Iterable[str]: """The timestamps and data to be shown in a block, next to the tree.""" # Obtain dataframe with index = timestamp as string and columns = one or more of 'qwpr'. - df = pfl.df[list(cols)] + df = pfl.df[cols_and_units.keys()] # . (roughly) reduce number of timestamps to increase speed of conversion to strings. if len(df.index) > num_of_ts * 2: df = pd.concat([df.iloc[:num_of_ts, :], df.iloc[-num_of_ts:, :]], axis=0) # . turn values into strings. - df = shared_text.df_with_strvalues(df) + df = shared_text.df_with_strvalues(df, cols_and_units) # . turn index into strings and reduce to wanted number of datapoints df = shared_text.df_with_strindex(df, num_of_ts) # . column withs @@ -47,7 +50,7 @@ def _flatdatablock(pfl: PfLine, cols: Iterable[str], num_of_ts: int) -> Iterable def _childrenlines( - pfl: PfLine, cols: Iterable[str], num_of_ts: int, depth: int + pfl: PfLine, cols_and_units: dict[str, tools.unit.Unit], num_of_ts: int, depth: int ) -> Iterable[str]: """Treeview of only the children.""" out = [] @@ -56,7 +59,9 @@ def _childrenlines( for c, (name, child) in enumerate(pfl.items()): is_last, is_only = (c == len(pfl) - 1), (len(pfl) == 1) out.extend( - nestedtree(name, child, cols, num_of_ts, depth + 1, is_last, is_only) + nestedtree( + name, child, cols_and_units, num_of_ts, depth + 1, is_last, is_only + ) ) return out @@ -67,7 +72,7 @@ def _childrenlines( def nestedtree( name: str, pfl: PfLine, - cols: Iterable[str], + cols_and_units: dict[str, tools.unit.Unit], num_of_ts: int, depth: int = 0, is_last: bool = True, @@ -82,28 +87,36 @@ def nestedtree( if is_only and depth > 0: txtlines = ["(only contributor to parent data; has same values)"] else: - txtlines = _flatdatablock(pfl, cols, num_of_ts) + txtlines = _flatdatablock(pfl, cols_and_units, num_of_ts) for txtline in txtlines: out.append(tree["10"] + tree["11"] + colorama.Style.RESET_ALL + txtline) # Add children if any. - for txtline in _childrenlines(pfl, cols, num_of_ts, depth): + for txtline in _childrenlines(pfl, cols_and_units, num_of_ts, depth): out.append(tree["10"] + txtline) return out -def pfl_as_string(pfl: PfLine, flatten: bool, num_of_ts: int, color: bool) -> str: +def pfl_as_string( + pfl: PfLine, + flatten: bool, + num_of_ts: int, + color: bool, +) -> str: + cols_and_units = shared_text.cols_and_units(pfl) lines = [f"PfLine object with {_what(pfl)} information."] lines.extend(shared_text.index_info(pfl.index)) if isinstance(pfl, classes.NestedPfLine): lines.extend(_children_info(pfl)) cols = pfl.kind.available if flatten: - lines.extend(shared_text.dataheader(cols)) + lines.extend(shared_text.dataheader(cols_and_units)) lines.extend([""]) - lines.extend(_flatdatablock(pfl, cols, num_of_ts)) + lines.extend(_flatdatablock(pfl, cols_and_units, num_of_ts)) else: spaces = " " * (shared_text.MAX_DEPTH + 5) - lines.extend([spaces + txtline for txtline in shared_text.dataheader(cols)]) + lines.extend( + [spaces + txtline for txtline in shared_text.dataheader(cols_and_units)] + ) lines.extend(nestedtree("(this pfline)", pfl, cols, num_of_ts)) txt = "\n".join(lines) return txt if color else shared_text.remove_color(txt) diff --git a/portfolyo/core/pfstate/text.py b/portfolyo/core/pfstate/text.py index 0c70036..3a0606a 100644 --- a/portfolyo/core/pfstate/text.py +++ b/portfolyo/core/pfstate/text.py @@ -10,10 +10,15 @@ def pfs_as_string(pfs: PfState, num_of_ts: int, color: bool) -> str: + cols_and_units = shared_text.FALLBACKUNITS | shared_text.cols_and_units( + pfs.offtakevolume, pfs.unsourcedprice, pfs.sourced + ) # ensure we have all of wqpr lines = ["PfState object."] lines.extend(shared_text.index_info(pfs.index)) spaces = " " * (shared_text.MAX_DEPTH + 5) - lines.extend([spaces + txtline for txtline in shared_text.dataheader("wqpr")]) + lines.extend( + [spaces + txtline for txtline in shared_text.dataheader(cols_and_units)] + ) lines.extend(pfline_text.nestedtree("offtake", pfs.offtakevolume, "wq", num_of_ts)) lines.extend(pfline_text.nestedtree("pnl_cost", pfs.pnl_cost, "wqpr", num_of_ts)) txt = "\n".join(lines) diff --git a/portfolyo/core/shared/text.py b/portfolyo/core/shared/text.py index 91332f8..958ff05 100644 --- a/portfolyo/core/shared/text.py +++ b/portfolyo/core/shared/text.py @@ -9,13 +9,29 @@ COLORS = ["WHITE", "YELLOW", "CYAN", "GREEN", "RED", "BLUE", "MAGENTA", "BLACK"] TREECOLORS = [colorama.Style.BRIGHT + getattr(colorama.Fore, f) for f in COLORS] -_UNITS = {"w": "MW", "q": "MWh", "p": "Eur/MWh", "r": "Eur"} +FALLBACKUNITS = { + c: tools.unit.Unit(u) + for c, u in {"w": "MW", "q": "MWh", "p": "Eur/MWh", "r": "Eur"}.items() +} VALUEFORMAT = {"w": "{:,.1f}", "q": "{:,.0f}", "p": "{:,.2f}", "r": "{:,.0f}"} DATETIMEFORMAT = "%Y-%m-%d %H:%M:%S %z" COLWIDTHS = {"ts": 25, "w": 12, "q": 11, "p": 11, "r": 13} MAX_DEPTH = 6 +def cols_and_units(*objs) -> dict[str, tools.unit.Unit]: + """Return dictionary with all columns found in the .df property of the objects, plus their units.""" + units = {} + for obj in objs: + for c, pu in obj.df.dtypes.items(): + units[c] = ( + tools.unit.ureg.Eur + if pu.units.dimensionality == "[currency]" + else pu.units + ) + return units + + def remove_color(text: str) -> str: """Remove all color from text.""" for color in [colorama.Style.RESET_ALL, *TREECOLORS]: @@ -23,14 +39,14 @@ def remove_color(text: str) -> str: return text -def df_with_strvalues(df: pd.DataFrame, units: Dict = _UNITS): +def df_with_strvalues(df: pd.DataFrame, cols_and_units: dict[str, tools.unit.Unit]): """Turn dataframe with single column names ('w', 'p', etc) into text strings.""" if isinstance(df.columns, pd.MultiIndex): raise ValueError("Dataframe must have single column index; has MultiIndex.") str_series = {} for name, s in df.items(): - sin = s.pint.to(units.get(name)).pint.magnitude - formt = VALUEFORMAT.get(name).format + sin = s.pint.to(cols_and_units[str(name)]).pint.magnitude + formt = VALUEFORMAT[str(name)].format sout = sin.apply(formt).str.replace(",", " ", regex=False) str_series[name] = sout.mask(s.isna(), "") return pd.DataFrame(str_series) @@ -50,9 +66,10 @@ def df_with_strindex(df: pd.DataFrame, num_of_ts: int): def index_info(i: pd.DatetimeIndex) -> Iterable[str]: """Info about the index.""" + end = tools.right.stamp(i[-1], i.freq) return [ - f". Start: {i[0] } (incl) . Timezone : {i.tz or 'none'} ", - f". End : {tools.right.index(i)[-1]} (excl) . Start-of-day: {i[0].time()} ", + f". Start: {i[0] } (incl) . Timezone : {i.tz or 'none'} ", + f". End : {end } (excl) . Start-of-day: {i[0].time()} ", f". Freq : {i.freq} ({len(i)} datapoints)", ] @@ -75,10 +92,10 @@ def treedict(depth: int, is_last_child: bool, has_children: bool) -> Dict[str, s return tree -def dataheader(cols: Iterable[str] = "wqpr", units: Dict = _UNITS) -> Iterable[str]: - out = [" " * 25] * 2 # width of timestamps - for c in cols: +def dataheader(cols_and_units: Dict[str, tools.unit.Unit]) -> Iterable[str]: + out = [" " * COLWIDTHS["ts"]] * 2 # width of timestamps + for c, units in cols_and_units.items(): width = COLWIDTHS[c] + 1 out[0] += f"{c:>{width}}" - out[1] += f"{units[c]:>{width}}" + out[1] += f"{f'{units:~P}':>{width}}" # ~P for compact return out diff --git a/portfolyo/tools/characterize.py b/portfolyo/tools/characterize.py new file mode 100644 index 0000000..6259f21 --- /dev/null +++ b/portfolyo/tools/characterize.py @@ -0,0 +1,71 @@ +"""Module to characterize values. Only considering following types: +float, int, pint.Quantity, pandas.Series, pandas.DataFrame.""" + +# Terminology: +# +# We distinguish the following objects (mutually exclusive): +# Values (single) After tools_unit.normalize +# --------------- +# float float +# int float +# dimensionless Quantity float +# dimensional Quantity dimensional Quantity +# +# 1D-values +# --------- +# float-Series float-Series +# int-Series float-Series +# dimensionless pint-Series float-Series +# dimensional pint-Series dimensional pint-Series +# Series of pint-quantities of uniform dimensionality dimensional pint-Series +# Series of pint-quantities of non-uniform dimensionality Series of pint-quantities of non-uniform dimensionality +# +# 2D-values +# --------- +# DataFrame + + +import pint +import pandas as pd +import pint_pandas + +from . import unit as tools_unit + + +# Timeseries + + +def is_timeframe(v: pd.Series | pd.DataFrame) -> bool: + """True if Series or Dataframe with datetimeindex with frequency set. False if not DatetimeIndex. Error otherwise.""" + if not isinstance(v.index, pd.DatetimeIndex): + return False + elif v.index.freq is not None: + return True + raise ValueError("Found DatetimeIndex without frequency.") + + +# Dimensionality. + + +def has_uniform_dimensionality(v: pd.Series) -> bool: + """True if pint-Series, or if Series of quantities with same dimension.""" + if isinstance(v.dtype, pint_pandas.PintType): + return True # pintseries + elif pd.api.types.is_numeric_dtype(v.dtype): # check for PintType first! + return True # floatseries + else: + try: + _ = v.pint.convert_object_dtype() + except (pint.DimensionalityError, AttributeError): + return False + return True + + +def dimensionality(v: tools_unit.ALLOWED_TYPES) -> pint.util.UnitsContainer: + v = tools_unit.normalize(v) + if isinstance(v, float): + return tools_unit.ureg.get_dimensionality("[]") + elif isinstance(v, pint.Quantity): + return v.dimensionality + elif isinstance(v, pd.Series): + pass diff --git a/portfolyo/tools/freq.py b/portfolyo/tools/freq.py index 853f5ec..2cf924e 100644 --- a/portfolyo/tools/freq.py +++ b/portfolyo/tools/freq.py @@ -30,15 +30,14 @@ ] -def assert_freq_valid(freq: str) -> None: +def assert_freq_valid(freq: str | pd.offsets.BaseOffset) -> None: """ Validate if the given frequency string is allowed based on pandas offset objects. - Parameters: - freq (str): A string representing a frequency alias (e.g., "YS", "QS", "MS"). - - Raises: - ValueError: If the frequency is not allowed. + Parameters + ---------- + freq + Frequency, or frequency alias (e.g., "YS", "QS", "MS"). """ freq_offset = pd.tseries.frequencies.to_offset(freq) @@ -47,20 +46,20 @@ def assert_freq_valid(freq: str) -> None: # Check if the MRO is in the list of allowed MROs # have to make sure it's only the first class on the list if mro_class not in ALLOWED_CLASSES: - raise ValueError(f"The passed frequency '{freq}' is not allowed.") + raise AssertionError(f"The passed frequency '{freq}' is not allowed.") # Define restricted classes that should have n == 1 restricted_classes = { - pd.tseries.offsets.MonthBegin: 1, - pd.tseries.offsets.Day: 1, - pd.tseries.offsets.Hour: 1, - pd.tseries.offsets.Minute: 15, + pd.offsets.MonthBegin: 1, + pd.offsets.Day: 1, + pd.offsets.Hour: 1, + pd.offsets.Minute: 15, } allowed_n = restricted_classes.get(type(freq_offset)) if allowed_n is not None: # case where freq is not in restricted class # Check if freq_offset.n is not None and if it doesn't match allowed_n if freq_offset.n is None or freq_offset.n != allowed_n: - raise ValueError(f"The passed frequency {freq} is not allowed.") + raise AssertionError(f"The passed frequency {freq} is not allowed.") def up_or_down(freq_source: str, freq_target: str) -> int: diff --git a/portfolyo/tools/standardize.py b/portfolyo/tools/standardize.py index 487fcc8..b4c6320 100644 --- a/portfolyo/tools/standardize.py +++ b/portfolyo/tools/standardize.py @@ -122,7 +122,7 @@ def frame( raise ValueError("Could not standardize this frame") from e # Standardize index name. - fr = _standardize_index_name(fr) + fr = _fix_index_name(fr) # After standardizing timezone, the frequency should have been set. fr = tools_freq.set_to_frame(fr, freq_input) tools_freq.assert_freq_valid(fr.index.freq) @@ -152,7 +152,7 @@ def _fix_timezone(fr, force, tz, floating): ) -def _standardize_index_name(fr: Series_or_DataFrame) -> Series_or_DataFrame: +def _fix_index_name(fr: Series_or_DataFrame) -> Series_or_DataFrame: return fr.rename_axis(index="ts_left") @@ -161,8 +161,9 @@ def assert_frame_standardized(fr: Series_or_DataFrame) -> None: assert_index_standardized(fr.index) -def assert_index_standardized(i: pd.DatetimeIndex, __right: bool = False): - """Assert that index is standardized.""" +def assert_index_standardized(i: pd.DatetimeIndex) -> None: + """Assert that index is standardized: by checking if the frequency fits to the minute/second of the timestamps, the + day of the month, etc.""" if not isinstance(i, pd.DatetimeIndex): raise AssertionError(f"Expecting DatetimeIndex; got {type(i)}.") @@ -171,10 +172,6 @@ def assert_index_standardized(i: pd.DatetimeIndex, __right: bool = False): freq = i.freq if not freq: raise AssertionError("Index must have frequency set.") - # if freq not in (freqs := tools_freq.FREQUENCIES): - # raise AssertionError( - # f"Index frequency must be one of {', '.join(freqs)}; found '{freq}'." - # ) tools_freq.assert_freq_valid(freq) # Check length. @@ -183,11 +180,9 @@ def assert_index_standardized(i: pd.DatetimeIndex, __right: bool = False): # Check hour and minute. if tools_freq.up_or_down(freq, "15min") <= 0: # quarterhour - startminute = 15 if __right else 0 - if i[0].minute != startminute: - err = ("right-bound", "15 min past the") if __right else ("", "at a full") + if i[0].minute != 0: raise AssertionError( - f"The first element in an index with {err[0]} quarterhourly values must be {err[1]} hour; found {i[0]}." + f"The first element in an index with quarterhourly values must be at the full hour; found {i[0]}." ) if any(not_ok := [ts.minute not in (0, 15, 30, 45) for ts in i]): @@ -204,12 +199,7 @@ def assert_index_standardized(i: pd.DatetimeIndex, __right: bool = False): # Check time-of-day. if tools_freq.up_or_down(freq, "h") <= 0: # hour or shorter - if not __right: - start = i[0] - end = tools_right.stamp(i[-1], i.freq) - else: - start = tools_righttoleft.index(i)[0] - end = i[-1] + start, end = i[0], tools_right.stamp(i[-1], i.freq) if start.time() != end.time(): raise AssertionError( "An index must contain full days. For hourly-or-shorter values, this means " diff --git a/portfolyo/tools/testing.py b/portfolyo/tools/testing.py index a2587ab..db89d14 100644 --- a/portfolyo/tools/testing.py +++ b/portfolyo/tools/testing.py @@ -9,6 +9,9 @@ import pint from . import unit as tools_unit +from . import standardize as tools_standardize + +ALLOWED_TYPES = int | float | pint.Quantity | pd.Series | pd.DataFrame def assert_value_equal(left: Any, right: Any): @@ -20,20 +23,6 @@ def assert_value_equal(left: Any, right: Any): raise AssertionError from e -@functools.wraps(pd.testing.assert_frame_equal) -def assert_frame_equal(left: pd.DataFrame, right: pd.DataFrame, *args, **kwargs): - # Dataframes equal even if *order* of columns is not the same. - left = left.sort_index(axis=1) - right = right.sort_index(axis=1) - assert set(left.columns) == set(right.columns) - - for (coll, sl), (colr, sr) in zip(left.items(), right.items()): - # Names must match. - assert coll == colr - # Series must match. - assert_series_equal(sl, sr, *args, **kwargs) - - @functools.wraps(pd.testing.assert_series_equal) def assert_series_equal(left: pd.Series, right: pd.Series, *args, **kwargs): if pd.api.types.is_float_dtype(left) or pd.api.types.is_integer_dtype(left): @@ -53,7 +42,7 @@ def assert_series_equal(left: pd.Series, right: pd.Series, *args, **kwargs): rightm = right.pint.magnitude.replace([np.inf, -np.inf], np.nan) pd.testing.assert_series_equal(leftm, rightm, *args, **kwargs) - elif pd.api.types.is_object_dtype(left) and isinstance(left.iloc[0], tools_unit.Q_): + elif pd.api.types.is_object_dtype(left) and isinstance(left.iloc[0], pint.Quantity): # series of quantities? leftm = left.apply(lambda q: q.magnitude).replace([np.inf, -np.inf], np.nan) leftu = left.apply(lambda q: q.units) @@ -68,6 +57,20 @@ def assert_series_equal(left: pd.Series, right: pd.Series, *args, **kwargs): pd.testing.assert_series_equal(left, right, *args, **kwargs) +@functools.wraps(pd.testing.assert_frame_equal) +def assert_dataframe_equal(left: pd.DataFrame, right: pd.DataFrame, *args, **kwargs): + # Dataframes equal even if *order* of columns is not the same. + left = left.sort_index(axis=1) + right = right.sort_index(axis=1) + assert set(left.columns) == set(right.columns) + + for (coll, sl), (colr, sr) in zip(left.items(), right.items()): + # Names must match. + assert coll == colr + # Series must match. + assert_series_equal(sl, sr, *args, **kwargs) + + assert_index_equal = pd.testing.assert_index_equal @@ -81,6 +84,43 @@ def assert_indices_compatible(left: pd.DatetimeIndex, right: pd.DatetimeIndex): raise AssertionError(f"Indices that have unequal timezone; {lz} and {rz}.") +# Characterizing input data. + + +assert_index_standardized = tools_standardize.assert_index_standardized +assert_frame_standardized = tools_standardize.assert_frame_standardized + + +def assert_allowed_type(v: Any) -> None: + if not isinstance(v, ALLOWED_TYPES): + raise AssertionError(f"Unexpected type: {type(v)}.") + + +def order(v: ALLOWED_TYPES) -> int: + """Return 0 if ``v`` is float, int, or Quantity. Return 1 if ``v`` is a Series. Return 2 if ``v`` is a DataFrame.""" + if isinstance(v, float | int | pint.Quantity): + return 0 + elif isinstance(v, pd.Series): + return 1 + elif isinstance(v, pd.DataFrame): + return 2 + + +def is_order_0(v: ALLOWED_TYPES) -> bool: + return isinstance(v, float | int | pint.Quantity) + + +def is_order_1(v: ALLOWED_TYPES) -> bool: + return isinstance(v, pd.Series) + + +def is_order_2(v: ALLOWED_TYPES) -> bool: + return isinstance(v, pd.DataFrame) + + +# Comparing energy, power, price, revenue. + + def assert_w_q_compatible(freq: str, w: pd.Series, q: pd.Series): """Assert that timeseries with power- and energy-values are consistent.""" if freq == "15min": diff --git a/portfolyo/tools/unit.py b/portfolyo/tools/unit.py index 38d9644..d3185ae 100644 --- a/portfolyo/tools/unit.py +++ b/portfolyo/tools/unit.py @@ -1,21 +1,19 @@ -""" -Working with pint units. -""" +"""Working with pint units.""" from pathlib import Path + from .types import Series_or_DataFrame import pandas as pd import pint import pint_pandas -import numpy as np path = Path(__file__).parent / "unitdefinitions.txt" ureg = pint_pandas.PintType.ureg = pint.UnitRegistry( str(path), - system="powerbase", auto_reduce_dimensions=True, + autoconvert_to_preferred=True, case_sensitive=False, ) ureg.formatter.default_format = "~P" # short by default @@ -33,12 +31,15 @@ "p": ureg.get_dimensionality("[currency]/[energy]"), "r": ureg.get_dimensionality("[currency]"), "duration": ureg.get_dimensionality("[time]"), - "t": ureg.get_dimensionality("temperature"), - "nodim": ureg.get_dimensionality("dimensionless"), + "t": ureg.get_dimensionality("[temperature]"), + "nodim": ureg.get_dimensionality("[]"), } +# DEFAULT_PREFERRED = [ureg.Eur] +# ureg.default_preferred_units = DEFAULT_PREFERRED + -def to_name(arg: Unit | Q_) -> str: +def to_name(arg: pint.Unit | pint.Quantity) -> str: """Find the standard column name belonging to ``arg``, which can be a unit or a quantity. Checks on dimensionality, not exact unit.""" for name, dim in NAMES_AND_DIMENSIONS.items(): @@ -89,74 +90,104 @@ def to_name(arg: Unit | Q_) -> str: # -def avoid_frame_of_objects( +def normalize_value(v: float | int | pint.Quantity) -> float | pint.Quantity: + """Ensure a value is a float or a (non-dimensionless) quantity. + + Parameters + ---------- + v + Input value to normalize. + + Returns + ------- + Normalized value. + """ + if isinstance(v, int): + return float(v) + + elif isinstance(v, float): + return v + + elif isinstance(v, pint.Quantity): + float_magnitude = float(v.magnitude) + if v.dimensionless: + return float_magnitude + return Q_(float_magnitude, v.units) + raise TypeError(f"Expected float, int, or pint Quantity; got {type(v)}.") + + +def normalize_frame( fr: Series_or_DataFrame, strict: bool = True ) -> Series_or_DataFrame: - """Ensure a Series or Dataframe does not have objects as its values, - if possible. + """Ensure a Series (or each Series in a Dataframe) is a float-series or a (non-dimensionless) pint-Series, if possible. Parameters: ----------- - fr : Series_or_DataFrame + fr The input data structure, which can be either a pandas Series or DataFrame. - Expected int-Series, float-Series, pint-Series, or Series of pint quantities (of equal dimensionality). + Expect int-Series, float-Series, pint-Series, or Series of pint quantities (of equal dimensionality). + strict, optional (default: True) + If True, raises Error if ``fr`` cannot be converted into a frame without objects. Returns: -------- - Series_or_DataFrame The transformed data structure. """ if isinstance(fr, pd.DataFrame): - return pd.DataFrame({col: avoid_frame_of_objects(s) for col, s in fr.items()}) + return fr.apply(normalize_frame, axis=1) # fr is now a Series. if pd.api.types.is_integer_dtype(fr): + # Int-series --> convert to floats. return fr.astype(float) - if pd.api.types.is_float_dtype(fr): + + elif pd.api.types.is_float_dtype(fr): + # Float-series --> return as-is. return fr - if hasattr(fr, "pint"): - if isinstance(fr.dtype, pint_pandas.PintType): - return _normalize_pintseries(fr) - else: # we MAY have a series of pint quantities. Convert to pint-series, if possible - return _normalize_pintobjects(fr, strict) + + elif isinstance(fr.dtype, pint_pandas.PintType): + # Pint-series --> return as floats or pint-series (with float magnitude). + float_magnitudes = fr.pint.magnitude.astype(float) + if fr.pint.dimensionless: + return float_magnitudes + return float_magnitudes.astype(f"pint[{fr.pint.units}]") + + elif hasattr(fr, "pint"): + # Series of pint quantities. MAY be uniform dimension. + try: + pintseries = fr.pint.convert_object_dtype() # may have int magnitudes + except pint.DimensionalityError as e: + if not strict: + return fr + raise e + + # handle int-magnitudes and/or dimensionless data + return normalize_frame(pintseries) + raise TypeError( "Expected int-Series, float-Series, pint-Series, or Series of pint quantities (of equal dimensionality)." ) -def _normalize_pintseries(s: pd.Series) -> pd.Series: - float_magnitudes = s.pint.magnitude.astype(float) - if s.pint.dimensionless: - return float_magnitudes - return float_magnitudes.astype(f"pint[{s.pint.units}]") - - -def _normalize_pintobjects(s: pd.Series, strict: bool) -> pd.Series: - # If we have a series of quantities (and nan-values), convert to pint-series if possible. - is_q = s.apply(lambda v: isinstance(v, Q_)) - if not any(is_q): - if not strict: - return s - raise ValueError("Expected at least one quantity.") +def normalize( + v: float | int | pint.Quantity | pd.Series | pd.DataFrame, strict: bool = True +) -> float | pint.Quantity | pd.Series | pd.DataFrame: + """Ensure dimensionless values are floats (or float-series), and that other values are quantities (or pint-Series) with float magnitudes. - is_q_or_nan = s.apply(lambda v: isinstance(v, Q_) or np.isnan(v)) - if not all(is_q_or_nan): - if not strict: - return s - raise ValueError("Expected only quantities (and np.nan).") + Parameters + ---------- + v + Input data to normalize + strict, optional (default: True) + If True, raises Error if Series cannot be converted into a series without pint quantities. - units = {v.units for v in s.loc[is_q].values} - dims = {u.dimensionality for u in units} - - if len(dims) > 1: - if not strict: - return s - raise ValueError( - f"Expected a Series with quantities of the same dimension; got {dims}." - ) - - # All values are quantities of the same dimension. - - return s.astype(f"pint[{units.pop()}]") + Returns + ------- + Normalized data. + """ + if isinstance(v, float | int | pint.Quantity): + return normalize_value(v) + else: + return normalize_frame(v, strict) diff --git a/portfolyo/tools/unitdefinitions.txt b/portfolyo/tools/unitdefinitions.txt index 7636670..759faee 100644 --- a/portfolyo/tools/unitdefinitions.txt +++ b/portfolyo/tools/unitdefinitions.txt @@ -44,10 +44,3 @@ euro_per_MWh = euro / (1_000_000 * watthour) = Eur/MWh cent_per_kWh = (euro / 100) / (1000 * watthour) -@system energybase - megawatthour -@end - -@system powerbase - megawatt:watthour -@end diff --git a/portfolyo/tools/wavg.py b/portfolyo/tools/wavg.py index 00db2ba..ddfb0ea 100644 --- a/portfolyo/tools/wavg.py +++ b/portfolyo/tools/wavg.py @@ -104,7 +104,7 @@ def general( def unweighted(s: pd.Series) -> float | tools_unit.Q_: - s = tools_unit.avoid_frame_of_objects(s) + s = tools_unit.normalize_frame(s) return s.mean() @@ -119,7 +119,7 @@ def unweighted_subset(s: pd.Series) -> float | tools_unit.Q_: def nanunlessuniform(s: pd.Series) -> float | tools_unit.Q_: if any(s.isna()): return np.nan - s = tools_unit.avoid_frame_of_objects(s) + s = tools_unit.normalize_frame(s) if not values_are_uniform(s): return np.nan return s.iloc[0] @@ -138,7 +138,7 @@ def wavg_subset(s: pd.Series) -> float | tools_unit.Q_: s = s.loc[factors.index] if any(s.isna()): return np.nan - s = tools_unit.avoid_frame_of_objects(s) + s = tools_unit.normalize_frame(s) return sum(s * factors) return wavg_subset @@ -283,7 +283,7 @@ def dataframe( # Transposing loses some properties, like .index.freq if axis == 1 and isinstance(result.index, pd.DatetimeIndex): result = tools_freq.guess_to_frame(result) - return tools_unit.avoid_frame_of_objects(result, False) + return tools_unit.normalize_frame(result, False) def weights_as_floatseries( @@ -299,7 +299,7 @@ def weights_as_floatseries( else: raise TypeError("``weights`` must be iterable or mapping.") # Step 2: avoid Series of Quantity-objects (convert to pint-series instead). - weightseries = tools_unit.avoid_frame_of_objects(weightseries) + weightseries = tools_unit.normalize_frame(weightseries) # Step 3: keep magnitude only. if isinstance(weightseries.dtype, pint_pandas.PintType): weightseries = weightseries.pint.magnitude diff --git a/pyproject.toml b/pyproject.toml index dc983b4..d64d2f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ readme = "README.rst" # dependencies [tool.poetry.dependencies] -python = "^3.10" +python = "^3.10,<3.13" # <3.13 because of mip. check if can be removed once mip gets updated pandas = "^2.2" matplotlib = "^3.7.2" pint = "^0.24" @@ -21,6 +21,7 @@ pint-pandas = "^0.6.2" colorama = "^0.4.6" numpy = "^1.26.2" tomli = "^2.0" +mip = "^1.15.0" [tool.poetry.group.test.dependencies] codecov = "^2.1.13" diff --git a/tests/core/pfline/test_flat_helper.py b/tests/core/pfline/test_flat_helper.py index 8f43ab5..e2786af 100644 --- a/tests/core/pfline/test_flat_helper.py +++ b/tests/core/pfline/test_flat_helper.py @@ -34,7 +34,7 @@ def test_makedataframe_freqtz(freq, tz): expected = pd.DataFrame({"q": q, "w": w}) expected.index.freq = freq - testing.assert_frame_equal(result1, expected, check_names=False) + testing.assert_dataframe_equal(result1, expected, check_names=False) i = pd.date_range("2020-01-01", freq="MS", periods=2) @@ -76,7 +76,7 @@ def test_makedataframe_inputtypes(data: Any, expected: pd.DataFrame | type): _ = flat_helper._dataframe(data) return result = flat_helper._dataframe(data) - testing.assert_frame_equal(result, expected) + testing.assert_dataframe_equal(result, expected) TESTCASES_COLUMNS = [ @@ -162,7 +162,7 @@ def test_makedataframe_consistency(tz, freq, columns, inputtype): expected["p"] = expected.r / expected.q expected["w"] = expected.q / expected.index.duration - testing.assert_frame_equal(result, expected) + testing.assert_dataframe_equal(result, expected) @pytest.mark.parametrize("freq1", ["15min", "MS", "YS"]) # don't do all - many! diff --git a/tests/core/pfline/test_nested_children.py b/tests/core/pfline/test_nested_children.py index 2d357c1..efb86f6 100644 --- a/tests/core/pfline/test_nested_children.py +++ b/tests/core/pfline/test_nested_children.py @@ -130,7 +130,7 @@ def do_set(pfl, name, child): result = pfl assert result == expected - testing.assert_frame_equal(result.df, expected.df) + testing.assert_dataframe_equal(result.df, expected.df) def do_test_dropchild(pfl: PfLine, to_drop: Iterable[str], expected: PfLine, how: str): @@ -153,7 +153,7 @@ def do_drop(pfl, name): result = pfl assert result == expected - testing.assert_frame_equal(result.df, expected.df) + testing.assert_dataframe_equal(result.df, expected.df) @pytest.mark.parametrize( diff --git a/tests/core/pfline/test_pfline_arithmatic_numeric_and_error.py b/tests/core/pfline/test_pfline_arithmatic_numeric_and_error.py index 5b91bf1..3de229c 100644 --- a/tests/core/pfline/test_pfline_arithmatic_numeric_and_error.py +++ b/tests/core/pfline/test_pfline_arithmatic_numeric_and_error.py @@ -561,7 +561,7 @@ def calc(): result = calc() if isinstance(result, pd.DataFrame): - testing.assert_frame_equal(result, tc.expected) + testing.assert_dataframe_equal(result, tc.expected) elif isinstance(result, pd.Series): testing.assert_series_equal(result, tc.expected) else: diff --git a/tests/core/pfline/test_pfline_init.py b/tests/core/pfline/test_pfline_init.py index 3c3fe13..7401604 100644 --- a/tests/core/pfline/test_pfline_init.py +++ b/tests/core/pfline/test_pfline_init.py @@ -236,7 +236,7 @@ def test_init_A( assert isinstance(result, expected_type) if type(itc.data_in) is type(constructor): assert result is itc.data_in # assert no copy but reference. - pf.testing.assert_frame_equal(result_df, itc.expected_df.rename_axis("ts_left")) + pf.testing.assert_dataframe_equal(result_df, itc.expected_df.rename_axis("ts_left")) assert result.kind is itc.expected_kind if expected_type is classes.NestedPfLine: assert len(result) diff --git a/tests/core/pfstate/test_pfstate_arithmatic.py b/tests/core/pfstate/test_pfstate_arithmatic.py index 50c1ce2..738653c 100644 --- a/tests/core/pfstate/test_pfstate_arithmatic.py +++ b/tests/core/pfstate/test_pfstate_arithmatic.py @@ -254,6 +254,6 @@ def calc(): # Test correct case. result = calc() if isinstance(expected, pd.DataFrame): - testing.assert_frame_equal(result, expected) + testing.assert_dataframe_equal(result, expected) else: assert result == expected diff --git a/tests/tools/test_changefreq.py b/tests/tools/test_changefreq.py index c249ede..28e5368 100644 --- a/tests/tools/test_changefreq.py +++ b/tests/tools/test_changefreq.py @@ -385,4 +385,4 @@ def do_test( fr = pd.DataFrame({"a": s}) expected = pd.DataFrame({"a": s_expected}) result = fn(fr, freq_target) - testing.assert_frame_equal(result, expected) + testing.assert_dataframe_equal(result, expected) diff --git a/tests/tools/test_frames.py b/tests/tools/test_frames.py index f65a30f..12d843e 100644 --- a/tests/tools/test_frames.py +++ b/tests/tools/test_frames.py @@ -121,7 +121,7 @@ def test_addheader_torows(df_index, header, expected_index): def test_concat(dfs, axis, expected): """Test if concatenation works as expected.""" result = tools.frame.concat(dfs, axis) - testing.assert_frame_equal(result, expected) + testing.assert_dataframe_equal(result, expected) vals1 = np.array([1, 2.0, -4.1234, 0]) diff --git a/tests/tools/test_intersect.py b/tests/tools/test_intersect.py index b425c18..c2d5d84 100644 --- a/tests/tools/test_intersect.py +++ b/tests/tools/test_intersect.py @@ -256,4 +256,4 @@ def do_test_intersect_frame( if isinstance(result, pd.Series): testing.assert_series_equal(result, expected, **kwargs) else: - testing.assert_frame_equal(result, expected, **kwargs) + testing.assert_dataframe_equal(result, expected, **kwargs) diff --git a/tests/tools/test_intersect_flex_frame.py b/tests/tools/test_intersect_flex_frame.py index 9f0d5b6..606ad43 100644 --- a/tests/tools/test_intersect_flex_frame.py +++ b/tests/tools/test_intersect_flex_frame.py @@ -38,8 +38,8 @@ def test_frames_ignore_tz(types: str, ignore_tz: bool): return exp_a, exp_b = pd.DataFrame({"col_a": exp_a}), pd.DataFrame({"col_b": exp_b}) result_a, result_b = tools.intersect.frames(a, b, ignore_tz=ignore_tz) - testing.assert_frame_equal(result_a, exp_a) - testing.assert_frame_equal(result_b, exp_b) + testing.assert_dataframe_equal(result_a, exp_a) + testing.assert_dataframe_equal(result_b, exp_b) @pytest.mark.parametrize("types", ["series", "df"]) @@ -81,8 +81,8 @@ def test_frames_ignore_start_of_day(types: str, ignore_start_of_day: bool): result_a, result_b = tools.intersect.frames( a, b, ignore_start_of_day=ignore_start_of_day ) - testing.assert_frame_equal(result_a, exp_a) - testing.assert_frame_equal(result_b, exp_b) + testing.assert_dataframe_equal(result_a, exp_a) + testing.assert_dataframe_equal(result_b, exp_b) @pytest.mark.parametrize("types", ["series", "df"]) @@ -114,8 +114,8 @@ def test_frames_ignore_freq(types: str, ignore_freq: bool): return exp_a, exp_b = pd.DataFrame({"col_a": exp_a}), pd.DataFrame({"col_b": exp_b}) result_a, result_b = tools.intersect.frames(a, b, ignore_freq=ignore_freq) - testing.assert_frame_equal(result_a, exp_a) - testing.assert_frame_equal(result_b, exp_b) + testing.assert_dataframe_equal(result_a, exp_a) + testing.assert_dataframe_equal(result_b, exp_b) @pytest.mark.parametrize("types", ["series", "df"]) @@ -171,5 +171,5 @@ def test_frames_ignore_all(types: str, ignore_all: bool): result_a, result_b = tools.intersect.frames( a, b, ignore_freq=True, ignore_start_of_day=True, ignore_tz=True ) - testing.assert_frame_equal(result_a, exp_a) - testing.assert_frame_equal(result_b, exp_b) + testing.assert_dataframe_equal(result_a, exp_a) + testing.assert_dataframe_equal(result_b, exp_b) diff --git a/tests/tools/test_peakconvert.py b/tests/tools/test_peakconvert.py index fdcc07d..b678a45 100644 --- a/tests/tools/test_peakconvert.py +++ b/tests/tools/test_peakconvert.py @@ -37,7 +37,7 @@ def test_completebpoframe_averagable(bpoframe, testcol: str, withunits: str): bpoframe = bpoframe.astype("pint[Eur/MWh]") df = bpoframe.drop(columns=testcol) result = tools.peakconvert.complete_bpoframe(df, f_germanpower, is_summable=False) - tools.testing.assert_frame_equal(result, bpoframe) + tools.testing.assert_dataframe_equal(result, bpoframe) @pytest.mark.parametrize("withunits", ["units", "nounits"]) @@ -69,7 +69,7 @@ def test_completebpoframe_summable(bpoframe, testcol: str, withunits: str): bpoframe = bpoframe.astype("pint[Eur/MWh]") df = bpoframe.drop(columns=testcol) result = tools.peakconvert.complete_bpoframe(df, f_germanpower, is_summable=True) - tools.testing.assert_frame_equal(result, bpoframe) + tools.testing.assert_dataframe_equal(result, bpoframe) @pytest.mark.parametrize("tz", [None, "Europe/Berlin"]) @@ -116,4 +116,4 @@ def test_moreconversions_averagable( # Do testing. result = tools.peakconvert.complete_bpoframe(df, f_germanpower, is_summable=False) - tools.testing.assert_frame_equal(result, expected) + tools.testing.assert_dataframe_equal(result, expected) diff --git a/tests/tools/test_trim.py b/tests/tools/test_trim.py index f01183d..99dac85 100644 --- a/tests/tools/test_trim.py +++ b/tests/tools/test_trim.py @@ -375,7 +375,7 @@ def do_test_dataframe( expected = pd.DataFrame({"a": expected_series(i, i_expected, dtype)}) result = tools.trim.frame(fr, trimfreq) - testing.assert_frame_equal(result, expected) + testing.assert_dataframe_equal(result, expected) def expected_series(i, i_expected, dtype): diff --git a/tests/tools/test_tzone.py b/tests/tools/test_tzone.py index 214f2e6..ea0f182 100644 --- a/tests/tools/test_tzone.py +++ b/tests/tools/test_tzone.py @@ -170,7 +170,7 @@ def do_test_conversion(aggfreq, tzt_in, tzt_out, seriesordf, conversion_fn): expected = df_out if seriesordf == "df" else df_out["col1"] result = conversion_fn(fr_in) if seriesordf == "df": - testing.assert_frame_equal( + testing.assert_dataframe_equal( result, expected, check_names=False, check_freq=False ) else: diff --git a/tests/tools/test_unit.py b/tests/tools/test_unit.py index e47c01a..3df7a0b 100644 --- a/tests/tools/test_unit.py +++ b/tests/tools/test_unit.py @@ -1,7 +1,10 @@ import numpy as np import pytest +import pint from portfolyo import tools +import pandas as pd +from portfolyo.tools.testing import assert_series_equal from portfolyo.tools.unit import Q_, ureg UNIT_IDENTITIES = [ @@ -71,3 +74,205 @@ def test_quantities_consistent(units): def test_extended_identities(quants): for q in quants: assert np.isclose(q, quants[0]) + + +# Normalize. + + +@pytest.fixture(params=[10, 10.0, -15, -15.0]) +def intfloat(request): + return request.param + + +@pytest.fixture(params=[[10, 10, -15], [-15.0, 200, -18]]) +def intfloats(request): + return request.param + + +@pytest.fixture(params=["Eur/MWh", "MWh", "tce/h"]) +def units(request): + return request.param + + +@pytest.fixture(params=[["Eur/MWh", "MWh", "tce/h"]]) +def units_mixeddim(request): + return request.param + + +@pytest.fixture(params=[[("MWh", 1), ("GWh", 1000), ("TWh", 1e6)]]) +def units_onedim(request): + return request.param + + +@pytest.fixture( + params=[ + pytest.param(pd.date_range("2020", freq="h", periods=3), id="hourlyindex"), + pytest.param(pd.date_range("2020", freq="MS", periods=3), id="monthlyindex"), + pytest.param([0, 1, 2], id="integerindex"), + ] +) +def index(request): + return request.param + + +@pytest.fixture( + params=[ + pytest.param(tools.unit.normalize_value, id="normalize_value"), + pytest.param(tools.unit.normalize, id="normalize"), + ] +) +def fn_to_test_value(request): + return request.param + + +@pytest.fixture( + params=[pytest.param(True, id="strict"), pytest.param(False, id="notstrict")] +) +def strict(request): + return request.param + + +@pytest.fixture( + params=[ + pytest.param(tools.unit.normalize_frame, id="normalize_frame"), + pytest.param(tools.unit.normalize, id="normalize"), + ] +) +def fn_to_test_frame_part1(request): + return request.param + + +@pytest.fixture +def fn_to_test_frame(fn_to_test_frame_part1, strict): + def partial(fr): + return fn_to_test_frame_part1(fr, strict) + + return partial + + +@pytest.fixture +def floatt(intfloat): + return float(intfloat) + + +@pytest.fixture +def floatts(intfloats): + return [float(v) for v in intfloats] + + +@pytest.fixture +def intfloat_quantity(intfloat, units): + return Q_(intfloat, units) + + +@pytest.fixture +def float_quantity(floatt, units): + return Q_(floatt, units) + + +@pytest.fixture +def intfloat_series(intfloats, index): + return pd.Series(intfloats, index) + + +@pytest.fixture +def float_series(floatts, index): + return pd.Series(floatts, index) + + +@pytest.fixture +def quantity_series(intfloats, index, units): + return pd.Series([Q_(v, units) for v in intfloats], index) + + +@pytest.fixture +def dimensionless_quantity_series(intfloats, index): + return pd.Series([Q_(v, "") for v in intfloats], index) + + +@pytest.fixture +def pint_series(floatts, index, units): + return pd.Series(floatts, index).astype(f"pint[{units}]") + + +@pytest.fixture +def onedim_quantity_series(intfloats, index, units_onedim): + return pd.Series([Q_(v, u[0]) for v, u in zip(intfloats, units_onedim)], index) + + +@pytest.fixture +def onedim_pint_series(floatts, index, units_onedim): + return pd.Series([v * u[1] for v, u in zip(floatts, units_onedim)], index).astype( + f"pint[{units_onedim[0][0]}]" + ) + + +@pytest.fixture +def mixeddim_quantity_series(intfloats, index, units_mixeddim): + return pd.Series([Q_(v, u) for v, u in zip(intfloats, units_mixeddim)], index) + + +def test_normalize_value_intfloat(intfloat, floatt, fn_to_test_value): + result = fn_to_test_value(intfloat) + expected = floatt + + assert result == expected + + +def test_normalize_value_quantity(intfloat_quantity, float_quantity, fn_to_test_value): + result = fn_to_test_value(intfloat_quantity) + expected = float_quantity + + assert result == expected + assert result.magnitude == expected.magnitude + assert result.units == expected.units + + +def test_normalize_series_intfloat(intfloat_series, float_series, fn_to_test_frame): + result = fn_to_test_frame(intfloat_series) + expected = float_series + + assert_series_equal(result, expected) + + +def test_normalize_series_quantities(quantity_series, pint_series, fn_to_test_frame): + result = fn_to_test_frame(quantity_series) + expected = pint_series + + assert_series_equal(result, expected) + + +def test_normalize_series_dimensionlessquantities( + dimensionless_quantity_series, float_series, fn_to_test_frame +): + result = fn_to_test_frame(dimensionless_quantity_series) + expected = float_series + + assert_series_equal(result, expected) + + +def test_normalize_series_onedimquantities( + onedim_quantity_series, onedim_pint_series, fn_to_test_frame +): + result = fn_to_test_frame(onedim_quantity_series) + expected = onedim_pint_series + + assert_series_equal(result, expected) + + +def test_normalize_series_mixeddimquantities( + mixeddim_quantity_series, fn_to_test_frame, strict +): + if strict: + with pytest.raises(pint.DimensionalityError): + _ = fn_to_test_frame(mixeddim_quantity_series) + return + + result = fn_to_test_frame(mixeddim_quantity_series) + expected = mixeddim_quantity_series + + assert_series_equal(result, expected) + + +# TODO: add dataframe tests. +# TODO: add nanvalus