Add 2 new concepts, pafi and vent_ind

prockenschaub · rvandewater · Jun 5, 2023 · Jun 6, 2023 · Jun 6, 2023 · Jun 11, 2023
commit 52ec0b762f95d8a464770948948685441d7bf1d5
diff --git a/src/pyicu/callbacks/concept.py b/src/pyicu/callbacks/concept.py
@@ -8,6 +8,13 @@
 from ..interval import hours
 from ..utils import expand
 from .misc import collect_concepts
+from typing import Union
+from pandas._libs.tslibs.timedeltas import Timedelta
+from pandas.api.types import is_datetime64_any_dtype
+from pyicu.utils import slide
+from pyicu.tbl_utils import meta_vars
+from pyicu.container.table import rm_cols
+
 
 
 def gcs(
@@ -134,10 +141,6 @@ def collect_dots(concepts, interval=None, *args, merge_dat=False):
 
     return res
 
-import pandas as pd
-from pandas.api.types import is_datetime64_any_dtype
-from pandas.api.types import is_timedelta64_dtype
-
 def check_ival(x, iv):
     return isinstance(x, pd.DataFrame) and (not is_datetime64_any_dtype(x) or has_interval(x, iv))
 
@@ -175,3 +178,157 @@ def interval(dat):
 
     return ival
 
+def pafi(*args, match_win: Union[int, Timedelta] = Timedelta(hours=2),
+         mode: str = "match_vals", fix_na_fio2: bool = True, interval = None) -> pd.DataFrame:
+    """
+    Calculate the PaO2/FiO2 (or Horowitz index) for a given time point.
+
+    Args:
+        *args: Additional arguments.
+        match_win (int or Timedelta): Maximum time difference between two measurements for calculating their ratio.
+        mode (str): Calculation mode. Options: 'match_vals', 'extreme_vals', 'fill_gaps'.
+        fix_na_fio2 (bool): Impute missing FiO2 values with 21%.
+        interval: Time interval specification for 'fill_gaps' mode.
+
+    Returns:
+        DataFrame: Resulting data with calculated PaO2/FiO2 values.
+
+    Note:
+        - 'match_vals' allows a time difference of at most 'match_win' between two measurements for calculating their ratio.
+        - 'extreme_vals' uses the worst PaO2 and FiO2 values within the time window specified by 'match_win'.
+        - 'fill_gaps' is a variation of 'extreme_vals' that evaluates ratios at every time point specified by 'interval',
+          rather than only when a measurement for either PaO2 or FiO2 is available.
+        - If 'fix_na_fio2' is True, missing FiO2 values are imputed with 21, the percentage (by volume) of oxygen in air.
+    """
+    valid_modes = {
+        "match_vals": "match_vals",
+        "extreme_vals": "extreme_vals",
+        "fill_gaps": "fill_gaps"
+    }
+
+    if mode not in valid_modes:
+        raise ValueError(f"Invalid mode '{mode}'. Available options are: {', '.join(valid_modes)}.")
+
+    mode = valid_modes[mode]
+
+    assert isinstance(fix_na_fio2, bool)
+
+    cnc = ["po2", "fio2"]
+    res = collect_dots(cnc, interval, *args)
+    res = match_fio2(res, match_win, mode, cnc[1] if fix_na_fio2 else None)
+
+    res = res[(~res[cnc[0]].isna()) & (~res[cnc[1]].isna()) & (res[cnc[1]] != 0)]
+    res["pafi"] = 100 * res[cnc[0]] / res[cnc[1]]
+    res = rm_cols(res, cnc)
+
+    return res
+
+
+def match_fio2(x: pd.DataFrame, match_win: Union[int, Timedelta], mode: str, fio2 = None) -> pd.DataFrame:
+    match_win = as_interval(match_win)
+
+    assert match_win > check_interval(x)
+
+    if mode == "match_vals":
+        on12 = [f"{meta_vars(x[1])}=={meta_vars(x[2])}"]
+        on21 = [f"{meta_vars(x[2])}=={meta_vars(x[1])}"]
+
+        x = merge(
+            x[1].loc[x[2], on=on12, roll=match_win],
+            x[2].loc[x[1], on=on21, roll=match_win]
+        )
+        x = x.drop_duplicates()
+    else:
+        x = reduce(merge, x, how="outer")
+
+        if mode == "fill_gaps":
+            x = fill_gaps(x)
+        else:
+            assert mode == "extreme_vals"
+
+        win_expr = {
+            "o2sat": lambda x: x.min_or_na(),
+            "fio2": lambda x: x.max_or_na()
+        }
+
+        x = slide(x, win_expr, before=match_win, full_window=False)
+
+    if fio2 is not None:
+        x.loc[x[fio2].isna(), fio2] = 21
+
+    return x
+
+
+def vent_ind(*args, match_win: Union[int, Timedelta] = hours(6), min_length: Union[int, Timedelta] = mins(30),
+              interval = None) -> pd.DataFrame:
+    """
+    Determine time windows during which patients are mechanically ventilated.
+
+    Args:
+        ...: Additional arguments.
+        match_win (int or Timedelta): Maximum time difference between start and end events for ventilation.
+        min_length (int or Timedelta): Minimal time span between a ventilation start and end time.
+        interval: Time interval specification.
+
+    Returns:
+        DataFrame: Time windows during which patients are mechanically ventilated.
+
+    Note:
+        - Durations are represented by the 'dur_var' column in the returned DataFrame.
+        - The 'data_var' column indicates the ventilation status with True values.
+        - Currently, no clear distinction between invasive and non-invasive ventilation is made.
+    """
+    subset_true = lambda x, col: x[x[col].is_true()]
+    calc_dur = lambda x, y: x + match_win if y.isna() else y - x
+
+    final_int = interval
+
+    cnc = ["vent_start", "vent_end", "mech_vent"]
+    res = collect_dots(cnc, None, ...)
+
+    interval = check_interval(res)
+
+    if final_int is None:
+        final_int = interval
+
+    match_win = as_interval(match_win)
+    min_length = as_interval(min_length)
+
+    assert is_interval(final_int) and min_length < match_win and interval < min_length
+
+    if res[2].shape[0] > 0:
+        assert res[0].shape[0] == 0 and res[1].shape[0] == 0
+
+        res[2][["vent_ind", "mech_vent"]] = [~res[2]["mech_vent"].isna(), None]
+
+        res = change_interval(res[2], final_int, by_ref=True)
+
+        return res
+
+    assert res[2].shape[0] == 0
+
+    units(match_win) = units(interval)
+    units(min_length) = units(interval)
+
+    cnc = cnc[:-1]
+    res = [subset_true(res[i], cnc[i]) for i in range(len(res)-1)]
+    var = "vent_dur"
+
+    if res[1].shape[0] > 0:
+        idx_vars = [chr_ply(res, index_var)[i] for i in range(len(res))]
+        res[1][[var, idx_vars[1]]] = [res[1][idx_vars[1]], res[1][idx_vars[1]] - mins(1)]
+
+        jon = [chr_ply(do.call(map, ["c", lapply(reversed(res), meta_vars)]), paste, collapse=" == ")]
+
+        res = res[1].merge(res[0], roll=-match_win, on=jon)
+        res[[var] + cnc] = [calc_dur(res[idx_vars[1]], res[var]), None, None]
+        res = res[res[var] >= min_length]
+    else:
+        res = res[0][[var, "vent_start"]].assign(vent_start=match_win)
+
+    res = change_interval(res, final_int, by_ref=True)
+    res = aggregate(res, "max")
+    res["vent_ind"] = True
+
+    return as_win_tbl(res, dur_var=var, by_ref=True)
+
diff --git a/src/pyicu/callbacks/sofa.py b/src/pyicu/callbacks/sofa.py
@@ -78,7 +78,7 @@ def sofa_score(*args, worst_val_fun=max_or_na, explicit_wins=False,
 
     return res
 
-# Transformed from ricu (may contain bugs)
+# Transformed from ricu
 def sofa_cardio(interval=None, **kwargs):
     def score_calc(map, dopa, norepi, dobu, epi):
         if dopa > 15 or epi > 0.1 or norepi > 0.1:

diff --git a/tests/test_rpy2-sofa.py b/tests/test_rpy2-sofa.py
@@ -27,7 +27,7 @@
 utils.install_packages("mimic.demo", repos = "https://eth-mds.github.io/physionet-demo")
 
 # Load "sofa" concept from ricu
-sofa_score_ricu = ricu.load_concepts("sofa_liver", "mimic_demo", verbose = rpy2.robjects.vectors.BoolVector([False]))
+sofa_score_ricu = ricu.load_concepts("sofa", "mimic_demo", verbose = rpy2.robjects.vectors.BoolVector([False]))
 
 # Download mimic demo dataset for pyicu
 @contextmanager
@@ -60,7 +60,7 @@ def directory(path):
 print(mimic_demo.print_available())
 
 concepts = ConceptDict.from_defaults()
-sofa_score_pyicu = concepts.load_concepts("sofa_liver", mimic_demo)
+sofa_score_pyicu = concepts.load_concepts("sofa", mimic_demo)
 
 # Print results from ricu and pyicu
 print(sofa_score_ricu)