From 3eec76a6e83c4d2aeceb0969bc2948c87e559fc1 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sat, 7 Oct 2023 13:31:43 +0200
Subject: [PATCH] Update Stacking example to work with Py3.9 and sklearn 1.3
---
examples/custom/extensions/Stacking/exec.py | 15 ++++++++-------
.../custom/extensions/Stacking/requirements.txt | 2 +-
examples/custom/frameworks.yaml | 6 +++---
3 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/examples/custom/extensions/Stacking/exec.py b/examples/custom/extensions/Stacking/exec.py
index d8c80879d..47321feb6 100644
--- a/examples/custom/extensions/Stacking/exec.py
+++ b/examples/custom/extensions/Stacking/exec.py
@@ -30,18 +30,17 @@ def run(dataset, config):
training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')}
n_jobs = config.framework_params.get('_n_jobs', config.cores) # useful to disable multicore, regardless of the dataset config
- estimators_params = {e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'linear', 'svc', 'final']}
+ estimators_params = {e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'sgdclassifier', 'sgdregressor', 'svc', 'final']}
log.info("Running Sklearn Stacking Ensemble with a maximum time of {}s on {} cores.".format(config.max_runtime_seconds, n_jobs))
log.warning("We completely ignore the requirement to stay within the time limit.")
log.warning("We completely ignore the advice to optimize towards metric: {}.".format(config.metric))
-
if is_classification:
estimator = StackingClassifier(
estimators=[('rf', RandomForestClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])),
('gbm', GradientBoostingClassifier(random_state=config.seed, **estimators_params['gbm'])),
- ('linear', SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['linear'])),
+ ('linear', SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['sgdclassifier'])),
# ('svc', LinearSVC(random_state=config.seed, **estimators_params['svc']))
],
# final_estimator=SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']),
@@ -54,11 +53,11 @@ def run(dataset, config):
estimator = StackingRegressor(
estimators=[('rf', RandomForestRegressor(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])),
('gbm', GradientBoostingRegressor(random_state=config.seed, **estimators_params['gbm'])),
- ('linear', SGDRegressor(random_state=config.seed, **estimators_params['linear'])),
+ ('linear', SGDRegressor(random_state=config.seed, **estimators_params['sgdregressor'])),
('svc', LinearSVR(random_state=config.seed, **estimators_params['svc']))
],
# final_estimator=SGDRegressor(random_state=config.seed, **estimators_params['final']),
- final_estimator=LinearRegression(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']),
+ final_estimator=LinearRegression(n_jobs=n_jobs),
n_jobs=n_jobs,
**training_params
)
@@ -66,7 +65,8 @@ def run(dataset, config):
with Timer() as training:
estimator.fit(X_train, y_train)
- predictions = estimator.predict(X_test)
+ with Timer() as predict:
+ predictions = estimator.predict(X_test)
probabilities = estimator.predict_proba(X_test) if is_classification else None
return result(output_file=config.output_predictions_file,
@@ -75,7 +75,8 @@ def run(dataset, config):
probabilities=probabilities,
target_is_encoded=is_classification,
models_count=len(estimator.estimators_) + 1,
- training_duration=training.duration)
+ training_duration=training.duration,
+ predict_duration=predict.duration)
if __name__ == '__main__':
diff --git a/examples/custom/extensions/Stacking/requirements.txt b/examples/custom/extensions/Stacking/requirements.txt
index d2afe9e80..73f0cadcd 100644
--- a/examples/custom/extensions/Stacking/requirements.txt
+++ b/examples/custom/extensions/Stacking/requirements.txt
@@ -1 +1 @@
-scikit-learn==0.22.1
+scikit-learn==1.3.1
diff --git a/examples/custom/frameworks.yaml b/examples/custom/frameworks.yaml
index a68884811..e711bcbbb 100644
--- a/examples/custom/frameworks.yaml
+++ b/examples/custom/frameworks.yaml
@@ -9,15 +9,15 @@ GradientBoosting:
Stacking:
module: extensions.Stacking
- version: '0.22.1'
+ version: '1.3.1'
project: https://scikit-learn.org/stable/modules/ensemble.html#stacking
params:
_rf_params: {n_estimators: 200}
_gbm_params: {n_estimators: 200}
- _linear_params: {penalty: elasticnet, loss: log}
+ _sgdclassifier_params: {penalty: elasticnet, loss: log_loss}
+ _sgdregressor_params: {penalty: elasticnet}
# _svc_params: {tol: 1e-3, max_iter: 1e5}
# _final_params: {penalty: elasticnet, loss: log} # sgd linear
- _final_params: {max_iter: 1000} # logistic/linear
H2OAutoML_nightly:
module: frameworks.H2OAutoML