Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sofa concept #21

Open
wants to merge 14 commits into
base: dev
Choose a base branch
from
Prev Previous commit
Next Next commit
Add 2 new concepts, pafi and vent_ind
mxrcx committed Jun 11, 2023
commit 52ec0b762f95d8a464770948948685441d7bf1d5
165 changes: 161 additions & 4 deletions src/pyicu/callbacks/concept.py
Original file line number Diff line number Diff line change
@@ -8,6 +8,13 @@
from ..interval import hours
from ..utils import expand
from .misc import collect_concepts
from typing import Union
from pandas._libs.tslibs.timedeltas import Timedelta
from pandas.api.types import is_datetime64_any_dtype
from pyicu.utils import slide
from pyicu.tbl_utils import meta_vars
from pyicu.container.table import rm_cols



def gcs(
@@ -134,10 +141,6 @@ def collect_dots(concepts, interval=None, *args, merge_dat=False):

return res

import pandas as pd
from pandas.api.types import is_datetime64_any_dtype
from pandas.api.types import is_timedelta64_dtype

def check_ival(x, iv):
return isinstance(x, pd.DataFrame) and (not is_datetime64_any_dtype(x) or has_interval(x, iv))

@@ -175,3 +178,157 @@ def interval(dat):

return ival

def pafi(*args, match_win: Union[int, Timedelta] = Timedelta(hours=2),
mode: str = "match_vals", fix_na_fio2: bool = True, interval = None) -> pd.DataFrame:
"""
Calculate the PaO2/FiO2 (or Horowitz index) for a given time point.

Args:
*args: Additional arguments.
match_win (int or Timedelta): Maximum time difference between two measurements for calculating their ratio.
mode (str): Calculation mode. Options: 'match_vals', 'extreme_vals', 'fill_gaps'.
fix_na_fio2 (bool): Impute missing FiO2 values with 21%.
interval: Time interval specification for 'fill_gaps' mode.

Returns:
DataFrame: Resulting data with calculated PaO2/FiO2 values.

Note:
- 'match_vals' allows a time difference of at most 'match_win' between two measurements for calculating their ratio.
- 'extreme_vals' uses the worst PaO2 and FiO2 values within the time window specified by 'match_win'.
- 'fill_gaps' is a variation of 'extreme_vals' that evaluates ratios at every time point specified by 'interval',
rather than only when a measurement for either PaO2 or FiO2 is available.
- If 'fix_na_fio2' is True, missing FiO2 values are imputed with 21, the percentage (by volume) of oxygen in air.
"""
valid_modes = {
"match_vals": "match_vals",
"extreme_vals": "extreme_vals",
"fill_gaps": "fill_gaps"
}

if mode not in valid_modes:
raise ValueError(f"Invalid mode '{mode}'. Available options are: {', '.join(valid_modes)}.")

mode = valid_modes[mode]

assert isinstance(fix_na_fio2, bool)

cnc = ["po2", "fio2"]
res = collect_dots(cnc, interval, *args)
res = match_fio2(res, match_win, mode, cnc[1] if fix_na_fio2 else None)

res = res[(~res[cnc[0]].isna()) & (~res[cnc[1]].isna()) & (res[cnc[1]] != 0)]
res["pafi"] = 100 * res[cnc[0]] / res[cnc[1]]
res = rm_cols(res, cnc)

return res


def match_fio2(x: pd.DataFrame, match_win: Union[int, Timedelta], mode: str, fio2 = None) -> pd.DataFrame:
match_win = as_interval(match_win)

assert match_win > check_interval(x)

if mode == "match_vals":
on12 = [f"{meta_vars(x[1])}=={meta_vars(x[2])}"]
on21 = [f"{meta_vars(x[2])}=={meta_vars(x[1])}"]

x = merge(
x[1].loc[x[2], on=on12, roll=match_win],
x[2].loc[x[1], on=on21, roll=match_win]
)
x = x.drop_duplicates()
else:
x = reduce(merge, x, how="outer")

if mode == "fill_gaps":
x = fill_gaps(x)
else:
assert mode == "extreme_vals"

win_expr = {
"o2sat": lambda x: x.min_or_na(),
"fio2": lambda x: x.max_or_na()
}

x = slide(x, win_expr, before=match_win, full_window=False)

if fio2 is not None:
x.loc[x[fio2].isna(), fio2] = 21

return x


def vent_ind(*args, match_win: Union[int, Timedelta] = hours(6), min_length: Union[int, Timedelta] = mins(30),
interval = None) -> pd.DataFrame:
"""
Determine time windows during which patients are mechanically ventilated.

Args:
...: Additional arguments.
match_win (int or Timedelta): Maximum time difference between start and end events for ventilation.
min_length (int or Timedelta): Minimal time span between a ventilation start and end time.
interval: Time interval specification.

Returns:
DataFrame: Time windows during which patients are mechanically ventilated.

Note:
- Durations are represented by the 'dur_var' column in the returned DataFrame.
- The 'data_var' column indicates the ventilation status with True values.
- Currently, no clear distinction between invasive and non-invasive ventilation is made.
"""
subset_true = lambda x, col: x[x[col].is_true()]
calc_dur = lambda x, y: x + match_win if y.isna() else y - x

final_int = interval

cnc = ["vent_start", "vent_end", "mech_vent"]
res = collect_dots(cnc, None, ...)

interval = check_interval(res)

if final_int is None:
final_int = interval

match_win = as_interval(match_win)
min_length = as_interval(min_length)

assert is_interval(final_int) and min_length < match_win and interval < min_length

if res[2].shape[0] > 0:
assert res[0].shape[0] == 0 and res[1].shape[0] == 0

res[2][["vent_ind", "mech_vent"]] = [~res[2]["mech_vent"].isna(), None]

res = change_interval(res[2], final_int, by_ref=True)

return res

assert res[2].shape[0] == 0

units(match_win) = units(interval)
units(min_length) = units(interval)

cnc = cnc[:-1]
res = [subset_true(res[i], cnc[i]) for i in range(len(res)-1)]
var = "vent_dur"

if res[1].shape[0] > 0:
idx_vars = [chr_ply(res, index_var)[i] for i in range(len(res))]
res[1][[var, idx_vars[1]]] = [res[1][idx_vars[1]], res[1][idx_vars[1]] - mins(1)]

jon = [chr_ply(do.call(map, ["c", lapply(reversed(res), meta_vars)]), paste, collapse=" == ")]

res = res[1].merge(res[0], roll=-match_win, on=jon)
res[[var] + cnc] = [calc_dur(res[idx_vars[1]], res[var]), None, None]
res = res[res[var] >= min_length]
else:
res = res[0][[var, "vent_start"]].assign(vent_start=match_win)

res = change_interval(res, final_int, by_ref=True)
res = aggregate(res, "max")
res["vent_ind"] = True

return as_win_tbl(res, dur_var=var, by_ref=True)

2 changes: 1 addition & 1 deletion src/pyicu/callbacks/sofa.py
Original file line number Diff line number Diff line change
@@ -78,7 +78,7 @@ def sofa_score(*args, worst_val_fun=max_or_na, explicit_wins=False,

return res

# Transformed from ricu (may contain bugs)
# Transformed from ricu
def sofa_cardio(interval=None, **kwargs):
def score_calc(map, dopa, norepi, dobu, epi):
if dopa > 15 or epi > 0.1 or norepi > 0.1:
4 changes: 2 additions & 2 deletions tests/test_rpy2-sofa.py
Original file line number Diff line number Diff line change
@@ -27,7 +27,7 @@
utils.install_packages("mimic.demo", repos = "https://eth-mds.github.io/physionet-demo")

# Load "sofa" concept from ricu
sofa_score_ricu = ricu.load_concepts("sofa_liver", "mimic_demo", verbose = rpy2.robjects.vectors.BoolVector([False]))
sofa_score_ricu = ricu.load_concepts("sofa", "mimic_demo", verbose = rpy2.robjects.vectors.BoolVector([False]))

# Download mimic demo dataset for pyicu
@contextmanager
@@ -60,7 +60,7 @@ def directory(path):
print(mimic_demo.print_available())

concepts = ConceptDict.from_defaults()
sofa_score_pyicu = concepts.load_concepts("sofa_liver", mimic_demo)
sofa_score_pyicu = concepts.load_concepts("sofa", mimic_demo)

# Print results from ricu and pyicu
print(sofa_score_ricu)