Skip to content

Commit

Permalink
Hyperparameter sampling fixes (#37)
Browse files Browse the repository at this point in the history
* samplign bugfixing

* cleanup
  • Loading branch information
bcebere authored Feb 10, 2023
1 parent 825635d commit c62e72f
Show file tree
Hide file tree
Showing 14 changed files with 326 additions and 3 deletions.
2 changes: 2 additions & 0 deletions src/hyperimpute/plugins/imputers/plugin_EM.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ class EMPlugin(base.ImputerPlugin):
def __init__(
self,
random_state: int = 0,
maxit: int = 500,
convergence_threshold: float = 1e-08,
) -> None:
super().__init__(random_state=random_state)

Expand Down
1 change: 0 additions & 1 deletion src/hyperimpute/plugins/imputers/plugin_missforest.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
return [
params.Integer("n_estimators", 10, 50, 10),
params.Integer("max_iter", 100, 300, 100),
params.Integer("max_depth", 1, 3),
]

@decorators.benchmark
Expand Down
2 changes: 1 addition & 1 deletion src/hyperimpute/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.1.15"
__version__ = "0.1.16"

MAJOR_VERSION = ".".join(__version__.split(".")[:-1])
MINOR_VERSION = __version__.split(".")[-1]
29 changes: 29 additions & 0 deletions tests/imputers/test_em.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

# third party
import numpy as np
import optuna
import pandas as pd
import pytest
from sklearn.datasets import load_iris

# hyperimpute absolute
from hyperimpute.plugins.imputers import ImputerPlugin, Imputers
Expand Down Expand Up @@ -90,3 +92,30 @@ def test_compare_methods_perf(
rmse_other = RMSE(x_other.to_numpy(), x, mask)

assert rmse_em < rmse_other


def test_param_search() -> None:
if len(plugin.hyperparameter_space()) == 0:
return

X, _ = load_iris(return_X_y=True)
orig_val = X[0, 0]
X[0, 0] = np.nan

def evaluate_args(**kwargs: Any) -> float:
X_imp = plugin(**kwargs).fit_transform(X.copy()).values

return np.abs(orig_val - X_imp[0, 0])

def objective(trial: optuna.Trial) -> float:
args = plugin.sample_hyperparameters(trial)
return evaluate_args(**args)

study = optuna.create_study(
load_if_exists=True,
directions=["minimize"],
study_name=f"test_param_search_{plugin.name()}",
)
study.optimize(objective, n_trials=10, timeout=60)

assert len(study.trials) > 0
29 changes: 29 additions & 0 deletions tests/imputers/test_gain.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

# third party
import numpy as np
import optuna
import pandas as pd
import pytest
from sklearn.datasets import load_iris

# hyperimpute absolute
from hyperimpute.plugins.imputers import ImputerPlugin, Imputers
Expand Down Expand Up @@ -90,3 +92,30 @@ def test_compare_methods_perf(
rmse_other = RMSE(x_other.to_numpy(), x, mask)

assert rmse_gain < rmse_other


def test_param_search() -> None:
if len(plugin.hyperparameter_space()) == 0:
return

X, _ = load_iris(return_X_y=True)
orig_val = X[0, 0]
X[0, 0] = np.nan

def evaluate_args(**kwargs: Any) -> float:
X_imp = plugin(**kwargs).fit_transform(X.copy()).values

return np.abs(orig_val - X_imp[0, 0])

def objective(trial: optuna.Trial) -> float:
args = plugin.sample_hyperparameters(trial)
return evaluate_args(**args)

study = optuna.create_study(
load_if_exists=True,
directions=["minimize"],
study_name=f"test_param_search_{plugin.name()}",
)
study.optimize(objective, n_trials=10, timeout=60)

assert len(study.trials) > 0
29 changes: 29 additions & 0 deletions tests/imputers/test_hyperimpute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

# third party
import numpy as np
import optuna
import pandas as pd
import pytest
from sklearn.datasets import load_iris

# hyperimpute absolute
from hyperimpute.plugins.imputers import ImputerPlugin, Imputers
Expand Down Expand Up @@ -184,3 +186,30 @@ def test_imputation_order(
rmse_other = RMSE(x_other.to_numpy(), x, mask)

assert rmse_mf < rmse_other


def test_param_search() -> None:
if len(plugin.hyperparameter_space()) == 0:
return

X, _ = load_iris(return_X_y=True)
orig_val = X[0, 0]
X[0, 0] = np.nan

def evaluate_args(**kwargs: Any) -> float:
X_imp = plugin(**kwargs).fit_transform(X.copy()).values

return np.abs(orig_val - X_imp[0, 0])

def objective(trial: optuna.Trial) -> float:
args = plugin.sample_hyperparameters(trial)
return evaluate_args(**args)

study = optuna.create_study(
load_if_exists=True,
directions=["minimize"],
study_name=f"test_param_search_{plugin.name()}",
)
study.optimize(objective, n_trials=10, timeout=60)

assert len(study.trials) > 0
29 changes: 29 additions & 0 deletions tests/imputers/test_ice.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

# third party
import numpy as np
import optuna
import pandas as pd
import pytest
from sklearn.datasets import load_iris

# hyperimpute absolute
from hyperimpute.plugins.imputers import ImputerPlugin, Imputers
Expand Down Expand Up @@ -93,3 +95,30 @@ def test_compare_methods_perf(
rmse_other = RMSE(x_other.to_numpy(), x, mask)

assert rmse_ice < rmse_other


def test_param_search() -> None:
if len(plugin.hyperparameter_space()) == 0:
return

X, _ = load_iris(return_X_y=True)
orig_val = X[0, 0]
X[0, 0] = np.nan

def evaluate_args(**kwargs: Any) -> float:
X_imp = plugin(**kwargs).fit_transform(X.copy()).values

return np.abs(orig_val - X_imp[0, 0])

def objective(trial: optuna.Trial) -> float:
args = plugin.sample_hyperparameters(trial)
return evaluate_args(**args)

study = optuna.create_study(
load_if_exists=True,
directions=["minimize"],
study_name=f"test_param_search_{plugin.name()}",
)
study.optimize(objective, n_trials=10, timeout=60)

assert len(study.trials) > 0
29 changes: 29 additions & 0 deletions tests/imputers/test_mice.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

# third party
import numpy as np
import optuna
import pandas as pd
import pytest
from sklearn.datasets import load_iris

# hyperimpute absolute
from hyperimpute.plugins.imputers import ImputerPlugin, Imputers
Expand Down Expand Up @@ -100,3 +102,30 @@ def test_compare_methods_perf(
rmse_other = RMSE(x_other.to_numpy(), x, mask)

assert rmse_ice < rmse_other


def test_param_search() -> None:
if len(plugin.hyperparameter_space()) == 0:
return

X, _ = load_iris(return_X_y=True)
orig_val = X[0, 0]
X[0, 0] = np.nan

def evaluate_args(**kwargs: Any) -> float:
X_imp = plugin(**kwargs).fit_transform(X.copy()).values

return np.abs(orig_val - X_imp[0, 0])

def objective(trial: optuna.Trial) -> float:
args = plugin.sample_hyperparameters(trial)
return evaluate_args(**args)

study = optuna.create_study(
load_if_exists=True,
directions=["minimize"],
study_name=f"test_param_search_{plugin.name()}",
)
study.optimize(objective, n_trials=10, timeout=60)

assert len(study.trials) > 0
32 changes: 32 additions & 0 deletions tests/imputers/test_miracle.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
# stdlib
from typing import Any

# third party
import numpy as np
import optuna
import pandas as pd
import pytest
from sklearn.datasets import load_iris

# hyperimpute absolute
from hyperimpute.plugins.imputers import ImputerPlugin, Imputers
Expand Down Expand Up @@ -50,3 +55,30 @@ def test_mean_plugin_fit_transform(test_plugin: ImputerPlugin) -> None:
)

assert res.isnull().values.any() == False # noqa


def test_param_search() -> None:
if len(plugin.hyperparameter_space()) == 0:
return

X, _ = load_iris(return_X_y=True)
orig_val = X[0, 0]
X[0, 0] = np.nan

def evaluate_args(**kwargs: Any) -> float:
X_imp = plugin(**kwargs).fit_transform(X.copy()).values

return np.abs(orig_val - X_imp[0, 0])

def objective(trial: optuna.Trial) -> float:
args = plugin.sample_hyperparameters(trial)
return evaluate_args(**args)

study = optuna.create_study(
load_if_exists=True,
directions=["minimize"],
study_name=f"test_param_search_{plugin.name()}",
)
study.optimize(objective, n_trials=10, timeout=60)

assert len(study.trials) > 0
31 changes: 30 additions & 1 deletion tests/imputers/test_missforest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

# third party
import numpy as np
import optuna
import pandas as pd
import pytest
from sklearn.datasets import load_iris

# hyperimpute absolute
from hyperimpute.plugins.imputers import ImputerPlugin, Imputers
Expand Down Expand Up @@ -44,7 +46,7 @@ def test_missforest_plugin_type(test_plugin: ImputerPlugin) -> None:

@pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
def test_missforest_plugin_hyperparams(test_plugin: ImputerPlugin) -> None:
assert len(test_plugin.hyperparameter_space()) == 3
assert len(test_plugin.hyperparameter_space()) == 2
assert test_plugin.hyperparameter_space()[0].name == "n_estimators"


Expand Down Expand Up @@ -91,3 +93,30 @@ def test_compare_methods_perf(
rmse_other = RMSE(x_other.to_numpy(), x, mask)

assert rmse_mf < rmse_other


def test_param_search() -> None:
if len(plugin.hyperparameter_space()) == 0:
return

X, _ = load_iris(return_X_y=True)
orig_val = X[0, 0]
X[0, 0] = np.nan

def evaluate_args(**kwargs: Any) -> float:
X_imp = plugin(**kwargs).fit_transform(X.copy()).values

return np.abs(orig_val - X_imp[0, 0])

def objective(trial: optuna.Trial) -> float:
args = plugin.sample_hyperparameters(trial)
return evaluate_args(**args)

study = optuna.create_study(
load_if_exists=True,
directions=["minimize"],
study_name=f"test_param_search_{plugin.name()}",
)
study.optimize(objective, n_trials=10, timeout=60)

assert len(study.trials) > 0
29 changes: 29 additions & 0 deletions tests/imputers/test_miwae.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

# third party
import numpy as np
import optuna
import pandas as pd
import pytest
from sklearn.datasets import load_iris

# hyperimpute absolute
from hyperimpute.plugins.imputers import ImputerPlugin, Imputers
Expand Down Expand Up @@ -90,3 +92,30 @@ def test_compare_methods_perf(
rmse_other = RMSE(x_other.to_numpy(), x, mask)

assert rmse_ot < rmse_other


def test_param_search() -> None:
if len(plugin.hyperparameter_space()) == 0:
return

X, _ = load_iris(return_X_y=True)
orig_val = X[0, 0]
X[0, 0] = np.nan

def evaluate_args(**kwargs: Any) -> float:
X_imp = plugin(**kwargs).fit_transform(X.copy()).values

return np.abs(orig_val - X_imp[0, 0])

def objective(trial: optuna.Trial) -> float:
args = plugin.sample_hyperparameters(trial)
return evaluate_args(**args)

study = optuna.create_study(
load_if_exists=True,
directions=["minimize"],
study_name=f"test_param_search_{plugin.name()}",
)
study.optimize(objective, n_trials=10, timeout=60)

assert len(study.trials) > 0
Loading

0 comments on commit c62e72f

Please sign in to comment.