Skip to content

Commit

Permalink
feature: allow passing an explicit input resolution to read_csv (#157)
Browse files Browse the repository at this point in the history
Signed-off-by: F.N. Claessen <[email protected]>
  • Loading branch information
Flix6x authored Dec 11, 2023
1 parent e67f38c commit 4afa0de
Showing 1 changed file with 18 additions and 3 deletions.
21 changes: 18 additions & 3 deletions timely_beliefs/beliefs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
)
from timely_beliefs.sources import utils as source_utils

TimedeltaLike = Union[timedelta, str, pd.Timedelta]


def select_most_recent_belief(
df: "classes.BeliefsDataFrame",
Expand Down Expand Up @@ -534,7 +536,7 @@ def read_csv( # noqa C901
belief_horizon: timedelta = None,
belief_time: datetime = None,
cumulative_probability: float = None,
resample: bool = False,
resample: bool | TimedeltaLike = False,
timezone: Optional[str] = None,
filter_by_column: dict = None,
event_ends_after: datetime = None,
Expand All @@ -559,6 +561,7 @@ def read_csv( # noqa C901
:param belief_time: Optionally, set a specific belief time for the read-in data.
:param cumulative_probability: Optionally, set a specific cumulative probability for the read-in data.
:param resample: Optionally, resample to the event resolution of the sensor.
Set to True to infer the input resolution, or use a timedelta to set it explicitly.
Only implemented for the special read case of 2-column data (see below).
:param timezone: Optionally, localize timezone naive datetimes to a specific timezone.
Accepts IANA timezone names (e.g. UTC or Europe/Amsterdam).
Expand Down Expand Up @@ -675,7 +678,10 @@ def read_csv( # noqa C901
df = df[df["event_start"] < event_starts_before]

if resample:
df = resample_events(df, sensor, keep_nan_values=keep_nan_values)
resolution = resample if not isinstance(resample, bool) else None
df = resample_events(
df, sensor, keep_nan_values=keep_nan_values, resolution=resolution
)

# Apply optionally set belief timing
if belief_horizon is not None and belief_time is not None:
Expand Down Expand Up @@ -810,11 +816,20 @@ def interpret_special_read_cases(


def resample_events(
df: pd.DataFrame, sensor: "classes.Sensor", keep_nan_values: bool
df: pd.DataFrame,
sensor: "classes.Sensor",
keep_nan_values: bool,
resolution: TimedeltaLike | None = None,
) -> pd.DataFrame:
if df.empty:
return df
df = df.set_index("event_start")
if resolution is not None:
resolution = tb_utils.parse_timedelta_like(resolution)
index = initialize_index(
start=df.index[0], end=df.index[-1] + resolution, resolution=resolution
)
df = df.reindex(index)
if df.index.freq is None and len(df) > 2:
# Try to infer the event resolution from the event frequency
df.index.freq = pd.infer_freq(df.index)
Expand Down

0 comments on commit 4afa0de

Please sign in to comment.