diff --git a/src/hyperimpute/plugins/imputers/plugin_EM.py b/src/hyperimpute/plugins/imputers/plugin_EM.py index 53d0ddb..9956e3d 100644 --- a/src/hyperimpute/plugins/imputers/plugin_EM.py +++ b/src/hyperimpute/plugins/imputers/plugin_EM.py @@ -226,6 +226,8 @@ class EMPlugin(base.ImputerPlugin): def __init__( self, random_state: int = 0, + maxit: int = 500, + convergence_threshold: float = 1e-08, ) -> None: super().__init__(random_state=random_state) diff --git a/src/hyperimpute/plugins/imputers/plugin_missforest.py b/src/hyperimpute/plugins/imputers/plugin_missforest.py index cd8e3ea..c798a8c 100644 --- a/src/hyperimpute/plugins/imputers/plugin_missforest.py +++ b/src/hyperimpute/plugins/imputers/plugin_missforest.py @@ -72,7 +72,6 @@ def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: return [ params.Integer("n_estimators", 10, 50, 10), params.Integer("max_iter", 100, 300, 100), - params.Integer("max_depth", 1, 3), ] @decorators.benchmark diff --git a/src/hyperimpute/version.py b/src/hyperimpute/version.py index db9479b..bfb34d3 100644 --- a/src/hyperimpute/version.py +++ b/src/hyperimpute/version.py @@ -1,4 +1,4 @@ -__version__ = "0.1.15" +__version__ = "0.1.16" MAJOR_VERSION = ".".join(__version__.split(".")[:-1]) MINOR_VERSION = __version__.split(".")[-1] diff --git a/tests/imputers/test_em.py b/tests/imputers/test_em.py index 5a7a8fd..51a6840 100644 --- a/tests/imputers/test_em.py +++ b/tests/imputers/test_em.py @@ -3,8 +3,10 @@ # third party import numpy as np +import optuna import pandas as pd import pytest +from sklearn.datasets import load_iris # hyperimpute absolute from hyperimpute.plugins.imputers import ImputerPlugin, Imputers @@ -90,3 +92,30 @@ def test_compare_methods_perf( rmse_other = RMSE(x_other.to_numpy(), x, mask) assert rmse_em < rmse_other + + +def test_param_search() -> None: + if len(plugin.hyperparameter_space()) == 0: + return + + X, _ = load_iris(return_X_y=True) + orig_val = X[0, 0] + X[0, 0] = np.nan + + def evaluate_args(**kwargs: Any) -> float: + X_imp = plugin(**kwargs).fit_transform(X.copy()).values + + return np.abs(orig_val - X_imp[0, 0]) + + def objective(trial: optuna.Trial) -> float: + args = plugin.sample_hyperparameters(trial) + return evaluate_args(**args) + + study = optuna.create_study( + load_if_exists=True, + directions=["minimize"], + study_name=f"test_param_search_{plugin.name()}", + ) + study.optimize(objective, n_trials=10, timeout=60) + + assert len(study.trials) > 0 diff --git a/tests/imputers/test_gain.py b/tests/imputers/test_gain.py index 53294bd..4a91032 100644 --- a/tests/imputers/test_gain.py +++ b/tests/imputers/test_gain.py @@ -3,8 +3,10 @@ # third party import numpy as np +import optuna import pandas as pd import pytest +from sklearn.datasets import load_iris # hyperimpute absolute from hyperimpute.plugins.imputers import ImputerPlugin, Imputers @@ -90,3 +92,30 @@ def test_compare_methods_perf( rmse_other = RMSE(x_other.to_numpy(), x, mask) assert rmse_gain < rmse_other + + +def test_param_search() -> None: + if len(plugin.hyperparameter_space()) == 0: + return + + X, _ = load_iris(return_X_y=True) + orig_val = X[0, 0] + X[0, 0] = np.nan + + def evaluate_args(**kwargs: Any) -> float: + X_imp = plugin(**kwargs).fit_transform(X.copy()).values + + return np.abs(orig_val - X_imp[0, 0]) + + def objective(trial: optuna.Trial) -> float: + args = plugin.sample_hyperparameters(trial) + return evaluate_args(**args) + + study = optuna.create_study( + load_if_exists=True, + directions=["minimize"], + study_name=f"test_param_search_{plugin.name()}", + ) + study.optimize(objective, n_trials=10, timeout=60) + + assert len(study.trials) > 0 diff --git a/tests/imputers/test_hyperimpute.py b/tests/imputers/test_hyperimpute.py index 9a1bad2..c92810e 100644 --- a/tests/imputers/test_hyperimpute.py +++ b/tests/imputers/test_hyperimpute.py @@ -3,8 +3,10 @@ # third party import numpy as np +import optuna import pandas as pd import pytest +from sklearn.datasets import load_iris # hyperimpute absolute from hyperimpute.plugins.imputers import ImputerPlugin, Imputers @@ -184,3 +186,30 @@ def test_imputation_order( rmse_other = RMSE(x_other.to_numpy(), x, mask) assert rmse_mf < rmse_other + + +def test_param_search() -> None: + if len(plugin.hyperparameter_space()) == 0: + return + + X, _ = load_iris(return_X_y=True) + orig_val = X[0, 0] + X[0, 0] = np.nan + + def evaluate_args(**kwargs: Any) -> float: + X_imp = plugin(**kwargs).fit_transform(X.copy()).values + + return np.abs(orig_val - X_imp[0, 0]) + + def objective(trial: optuna.Trial) -> float: + args = plugin.sample_hyperparameters(trial) + return evaluate_args(**args) + + study = optuna.create_study( + load_if_exists=True, + directions=["minimize"], + study_name=f"test_param_search_{plugin.name()}", + ) + study.optimize(objective, n_trials=10, timeout=60) + + assert len(study.trials) > 0 diff --git a/tests/imputers/test_ice.py b/tests/imputers/test_ice.py index b96ded7..8ad5278 100644 --- a/tests/imputers/test_ice.py +++ b/tests/imputers/test_ice.py @@ -3,8 +3,10 @@ # third party import numpy as np +import optuna import pandas as pd import pytest +from sklearn.datasets import load_iris # hyperimpute absolute from hyperimpute.plugins.imputers import ImputerPlugin, Imputers @@ -93,3 +95,30 @@ def test_compare_methods_perf( rmse_other = RMSE(x_other.to_numpy(), x, mask) assert rmse_ice < rmse_other + + +def test_param_search() -> None: + if len(plugin.hyperparameter_space()) == 0: + return + + X, _ = load_iris(return_X_y=True) + orig_val = X[0, 0] + X[0, 0] = np.nan + + def evaluate_args(**kwargs: Any) -> float: + X_imp = plugin(**kwargs).fit_transform(X.copy()).values + + return np.abs(orig_val - X_imp[0, 0]) + + def objective(trial: optuna.Trial) -> float: + args = plugin.sample_hyperparameters(trial) + return evaluate_args(**args) + + study = optuna.create_study( + load_if_exists=True, + directions=["minimize"], + study_name=f"test_param_search_{plugin.name()}", + ) + study.optimize(objective, n_trials=10, timeout=60) + + assert len(study.trials) > 0 diff --git a/tests/imputers/test_mice.py b/tests/imputers/test_mice.py index 344a926..728bb33 100644 --- a/tests/imputers/test_mice.py +++ b/tests/imputers/test_mice.py @@ -3,8 +3,10 @@ # third party import numpy as np +import optuna import pandas as pd import pytest +from sklearn.datasets import load_iris # hyperimpute absolute from hyperimpute.plugins.imputers import ImputerPlugin, Imputers @@ -100,3 +102,30 @@ def test_compare_methods_perf( rmse_other = RMSE(x_other.to_numpy(), x, mask) assert rmse_ice < rmse_other + + +def test_param_search() -> None: + if len(plugin.hyperparameter_space()) == 0: + return + + X, _ = load_iris(return_X_y=True) + orig_val = X[0, 0] + X[0, 0] = np.nan + + def evaluate_args(**kwargs: Any) -> float: + X_imp = plugin(**kwargs).fit_transform(X.copy()).values + + return np.abs(orig_val - X_imp[0, 0]) + + def objective(trial: optuna.Trial) -> float: + args = plugin.sample_hyperparameters(trial) + return evaluate_args(**args) + + study = optuna.create_study( + load_if_exists=True, + directions=["minimize"], + study_name=f"test_param_search_{plugin.name()}", + ) + study.optimize(objective, n_trials=10, timeout=60) + + assert len(study.trials) > 0 diff --git a/tests/imputers/test_miracle.py b/tests/imputers/test_miracle.py index 5db5a8f..f68b507 100644 --- a/tests/imputers/test_miracle.py +++ b/tests/imputers/test_miracle.py @@ -1,7 +1,12 @@ +# stdlib +from typing import Any + # third party import numpy as np +import optuna import pandas as pd import pytest +from sklearn.datasets import load_iris # hyperimpute absolute from hyperimpute.plugins.imputers import ImputerPlugin, Imputers @@ -50,3 +55,30 @@ def test_mean_plugin_fit_transform(test_plugin: ImputerPlugin) -> None: ) assert res.isnull().values.any() == False # noqa + + +def test_param_search() -> None: + if len(plugin.hyperparameter_space()) == 0: + return + + X, _ = load_iris(return_X_y=True) + orig_val = X[0, 0] + X[0, 0] = np.nan + + def evaluate_args(**kwargs: Any) -> float: + X_imp = plugin(**kwargs).fit_transform(X.copy()).values + + return np.abs(orig_val - X_imp[0, 0]) + + def objective(trial: optuna.Trial) -> float: + args = plugin.sample_hyperparameters(trial) + return evaluate_args(**args) + + study = optuna.create_study( + load_if_exists=True, + directions=["minimize"], + study_name=f"test_param_search_{plugin.name()}", + ) + study.optimize(objective, n_trials=10, timeout=60) + + assert len(study.trials) > 0 diff --git a/tests/imputers/test_missforest.py b/tests/imputers/test_missforest.py index ecfc42f..7319cb3 100644 --- a/tests/imputers/test_missforest.py +++ b/tests/imputers/test_missforest.py @@ -3,8 +3,10 @@ # third party import numpy as np +import optuna import pandas as pd import pytest +from sklearn.datasets import load_iris # hyperimpute absolute from hyperimpute.plugins.imputers import ImputerPlugin, Imputers @@ -44,7 +46,7 @@ def test_missforest_plugin_type(test_plugin: ImputerPlugin) -> None: @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) def test_missforest_plugin_hyperparams(test_plugin: ImputerPlugin) -> None: - assert len(test_plugin.hyperparameter_space()) == 3 + assert len(test_plugin.hyperparameter_space()) == 2 assert test_plugin.hyperparameter_space()[0].name == "n_estimators" @@ -91,3 +93,30 @@ def test_compare_methods_perf( rmse_other = RMSE(x_other.to_numpy(), x, mask) assert rmse_mf < rmse_other + + +def test_param_search() -> None: + if len(plugin.hyperparameter_space()) == 0: + return + + X, _ = load_iris(return_X_y=True) + orig_val = X[0, 0] + X[0, 0] = np.nan + + def evaluate_args(**kwargs: Any) -> float: + X_imp = plugin(**kwargs).fit_transform(X.copy()).values + + return np.abs(orig_val - X_imp[0, 0]) + + def objective(trial: optuna.Trial) -> float: + args = plugin.sample_hyperparameters(trial) + return evaluate_args(**args) + + study = optuna.create_study( + load_if_exists=True, + directions=["minimize"], + study_name=f"test_param_search_{plugin.name()}", + ) + study.optimize(objective, n_trials=10, timeout=60) + + assert len(study.trials) > 0 diff --git a/tests/imputers/test_miwae.py b/tests/imputers/test_miwae.py index 51b7632..957b29a 100644 --- a/tests/imputers/test_miwae.py +++ b/tests/imputers/test_miwae.py @@ -3,8 +3,10 @@ # third party import numpy as np +import optuna import pandas as pd import pytest +from sklearn.datasets import load_iris # hyperimpute absolute from hyperimpute.plugins.imputers import ImputerPlugin, Imputers @@ -90,3 +92,30 @@ def test_compare_methods_perf( rmse_other = RMSE(x_other.to_numpy(), x, mask) assert rmse_ot < rmse_other + + +def test_param_search() -> None: + if len(plugin.hyperparameter_space()) == 0: + return + + X, _ = load_iris(return_X_y=True) + orig_val = X[0, 0] + X[0, 0] = np.nan + + def evaluate_args(**kwargs: Any) -> float: + X_imp = plugin(**kwargs).fit_transform(X.copy()).values + + return np.abs(orig_val - X_imp[0, 0]) + + def objective(trial: optuna.Trial) -> float: + args = plugin.sample_hyperparameters(trial) + return evaluate_args(**args) + + study = optuna.create_study( + load_if_exists=True, + directions=["minimize"], + study_name=f"test_param_search_{plugin.name()}", + ) + study.optimize(objective, n_trials=10, timeout=60) + + assert len(study.trials) > 0 diff --git a/tests/imputers/test_sinkhorn.py b/tests/imputers/test_sinkhorn.py index c7d3da9..e90b5f0 100644 --- a/tests/imputers/test_sinkhorn.py +++ b/tests/imputers/test_sinkhorn.py @@ -3,8 +3,10 @@ # third party import numpy as np +import optuna import pandas as pd import pytest +from sklearn.datasets import load_iris # hyperimpute absolute from hyperimpute.plugins.imputers import ImputerPlugin, Imputers @@ -90,3 +92,30 @@ def test_compare_methods_perf( rmse_other = RMSE(x_other.to_numpy(), x, mask) assert rmse_ot < rmse_other + + +def test_param_search() -> None: + if len(plugin.hyperparameter_space()) == 0: + return + + X, _ = load_iris(return_X_y=True) + orig_val = X[0, 0] + X[0, 0] = np.nan + + def evaluate_args(**kwargs: Any) -> float: + X_imp = plugin(**kwargs).fit_transform(X.copy()).values + + return np.abs(orig_val - X_imp[0, 0]) + + def objective(trial: optuna.Trial) -> float: + args = plugin.sample_hyperparameters(trial) + return evaluate_args(**args) + + study = optuna.create_study( + load_if_exists=True, + directions=["minimize"], + study_name=f"test_param_search_{plugin.name()}", + ) + study.optimize(objective, n_trials=10, timeout=60) + + assert len(study.trials) > 0 diff --git a/tests/imputers/test_sklearn_ice.py b/tests/imputers/test_sklearn_ice.py index 8999262..a6d84ee 100644 --- a/tests/imputers/test_sklearn_ice.py +++ b/tests/imputers/test_sklearn_ice.py @@ -3,8 +3,10 @@ # third party import numpy as np +import optuna import pandas as pd import pytest +from sklearn.datasets import load_iris # hyperimpute absolute from hyperimpute.plugins.imputers import ImputerPlugin, Imputers @@ -93,3 +95,30 @@ def test_compare_methods_perf( rmse_other = RMSE(x_other.to_numpy(), x, mask) assert rmse_ice < rmse_other + + +def test_param_search() -> None: + if len(plugin.hyperparameter_space()) == 0: + return + + X, _ = load_iris(return_X_y=True) + orig_val = X[0, 0] + X[0, 0] = np.nan + + def evaluate_args(**kwargs: Any) -> float: + X_imp = plugin(**kwargs).fit_transform(X.copy()).values + + return np.abs(orig_val - X_imp[0, 0]) + + def objective(trial: optuna.Trial) -> float: + args = plugin.sample_hyperparameters(trial) + return evaluate_args(**args) + + study = optuna.create_study( + load_if_exists=True, + directions=["minimize"], + study_name=f"test_param_search_{plugin.name()}", + ) + study.optimize(objective, n_trials=10, timeout=60) + + assert len(study.trials) > 0 diff --git a/tests/imputers/test_softimpute.py b/tests/imputers/test_softimpute.py index d89343c..d1c1d8e 100644 --- a/tests/imputers/test_softimpute.py +++ b/tests/imputers/test_softimpute.py @@ -3,8 +3,10 @@ # third party import numpy as np +import optuna import pandas as pd import pytest +from sklearn.datasets import load_iris # hyperimpute absolute from hyperimpute.plugins.imputers import ImputerPlugin, Imputers @@ -92,3 +94,30 @@ def test_compare_methods_perf( rmse_other = RMSE(x_other.to_numpy(), x, mask) assert rmse_soft < rmse_other + + +def test_param_search() -> None: + if len(plugin.hyperparameter_space()) == 0: + return + + X, _ = load_iris(return_X_y=True) + orig_val = X[0, 0] + X[0, 0] = np.nan + + def evaluate_args(**kwargs: Any) -> float: + X_imp = plugin(**kwargs).fit_transform(X.copy()).values + + return np.abs(orig_val - X_imp[0, 0]) + + def objective(trial: optuna.Trial) -> float: + args = plugin.sample_hyperparameters(trial) + return evaluate_args(**args) + + study = optuna.create_study( + load_if_exists=True, + directions=["minimize"], + study_name=f"test_param_search_{plugin.name()}", + ) + study.optimize(objective, n_trials=10, timeout=60) + + assert len(study.trials) > 0