-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathplugin_ice.py
89 lines (71 loc) · 2.94 KB
/
plugin_ice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# stdlib
from typing import Any, List
# third party
import pandas as pd
# hyperimpute absolute
import hyperimpute.plugins.core.params as params
import hyperimpute.plugins.imputers.base as base
from hyperimpute.plugins.imputers.plugin_hyperimpute import plugin as base_model
class IterativeChainedEquationsPlugin(base.ImputerPlugin):
"""Imputation plugin for completing missing values using the Multivariate Iterative chained equations Imputation strategy.
Method:
Multivariate Iterative chained equations(MICE) methods model each feature with missing values as a function of other features in a round-robin fashion. For each step of the round-robin imputation, we use a BayesianRidge estimator, which does a regularized linear regression.
Args:
max_iter: int, default=500
maximum number of imputation rounds to perform.
random_state: int, default set to the current time.
seed of the pseudo random number generator to use.
Example:
>>> import numpy as np
>>> from hyperimpute.plugins.imputers import Imputers
>>> plugin = Imputers().get("ice")
>>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]])
Reference: "mice: Multivariate Imputation by Chained Equations in R", Stef van Buuren, Karin Groothuis-Oudshoorn
"""
def __init__(
self,
max_iter: int = 1000,
initial_strategy: int = 0,
imputation_order: int = 0,
random_state: int = 0,
) -> None:
super().__init__(random_state=random_state)
self.max_iter = max_iter
self.initial_strategy = initial_strategy
self.imputation_order = imputation_order
self.random_state = random_state
self._model = base_model(
classifier_seed=["logistic_regression"],
regression_seed=["linear_regression"],
imputation_order=imputation_order,
baseline_imputer=initial_strategy,
random_state=random_state,
n_inner_iter=max_iter,
class_threshold=5,
)
@staticmethod
def name() -> str:
return "ice"
@staticmethod
def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
return [
params.Integer("max_iter", 100, 1000, 100),
params.Integer(
"initial_strategy",
0,
len(base_model.initial_strategy_vals) - 1,
),
params.Integer(
"imputation_order",
0,
len(base_model.imputation_order_vals) - 1,
),
]
def _fit(
self, X: pd.DataFrame, *args: Any, **kwargs: Any
) -> "IterativeChainedEquationsPlugin":
self._model.fit(X, *args, **kwargs)
return self
def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
return self._model.transform(X)
plugin = IterativeChainedEquationsPlugin