From 3eec76a6e83c4d2aeceb0969bc2948c87e559fc1 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Sat, 7 Oct 2023 13:31:43 +0200
Subject: [PATCH] Update Stacking example to work with Py3.9 and sklearn 1.3

---
 examples/custom/extensions/Stacking/exec.py       | 15 ++++++++-------
 .../custom/extensions/Stacking/requirements.txt   |  2 +-
 examples/custom/frameworks.yaml                   |  6 +++---
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/examples/custom/extensions/Stacking/exec.py b/examples/custom/extensions/Stacking/exec.py
index d8c80879d..47321feb6 100644
--- a/examples/custom/extensions/Stacking/exec.py
+++ b/examples/custom/extensions/Stacking/exec.py
@@ -30,18 +30,17 @@ def run(dataset, config):
 
     training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')}
     n_jobs = config.framework_params.get('_n_jobs', config.cores)  # useful to disable multicore, regardless of the dataset config
-    estimators_params = {e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'linear', 'svc', 'final']}
+    estimators_params = {e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'sgdclassifier', 'sgdregressor', 'svc', 'final']}
 
     log.info("Running Sklearn Stacking Ensemble with a maximum time of {}s on {} cores.".format(config.max_runtime_seconds, n_jobs))
     log.warning("We completely ignore the requirement to stay within the time limit.")
     log.warning("We completely ignore the advice to optimize towards metric: {}.".format(config.metric))
 
-
     if is_classification:
         estimator = StackingClassifier(
             estimators=[('rf', RandomForestClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])),
                         ('gbm', GradientBoostingClassifier(random_state=config.seed, **estimators_params['gbm'])),
-                        ('linear', SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['linear'])),
+                        ('linear', SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['sgdclassifier'])),
                         # ('svc', LinearSVC(random_state=config.seed, **estimators_params['svc']))
                         ],
             # final_estimator=SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']),
@@ -54,11 +53,11 @@ def run(dataset, config):
         estimator = StackingRegressor(
             estimators=[('rf', RandomForestRegressor(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])),
                         ('gbm', GradientBoostingRegressor(random_state=config.seed, **estimators_params['gbm'])),
-                        ('linear', SGDRegressor(random_state=config.seed, **estimators_params['linear'])),
+                        ('linear', SGDRegressor(random_state=config.seed, **estimators_params['sgdregressor'])),
                         ('svc', LinearSVR(random_state=config.seed, **estimators_params['svc']))
                         ],
             # final_estimator=SGDRegressor(random_state=config.seed, **estimators_params['final']),
-            final_estimator=LinearRegression(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']),
+            final_estimator=LinearRegression(n_jobs=n_jobs),
             n_jobs=n_jobs,
             **training_params
         )
@@ -66,7 +65,8 @@ def run(dataset, config):
     with Timer() as training:
         estimator.fit(X_train, y_train)
 
-    predictions = estimator.predict(X_test)
+    with Timer() as predict:
+        predictions = estimator.predict(X_test)
     probabilities = estimator.predict_proba(X_test) if is_classification else None
 
     return result(output_file=config.output_predictions_file,
@@ -75,7 +75,8 @@ def run(dataset, config):
                   probabilities=probabilities,
                   target_is_encoded=is_classification,
                   models_count=len(estimator.estimators_) + 1,
-                  training_duration=training.duration)
+                  training_duration=training.duration,
+                  predict_duration=predict.duration)
 
 
 if __name__ == '__main__':
diff --git a/examples/custom/extensions/Stacking/requirements.txt b/examples/custom/extensions/Stacking/requirements.txt
index d2afe9e80..73f0cadcd 100644
--- a/examples/custom/extensions/Stacking/requirements.txt
+++ b/examples/custom/extensions/Stacking/requirements.txt
@@ -1 +1 @@
-scikit-learn==0.22.1
+scikit-learn==1.3.1
diff --git a/examples/custom/frameworks.yaml b/examples/custom/frameworks.yaml
index a68884811..e711bcbbb 100644
--- a/examples/custom/frameworks.yaml
+++ b/examples/custom/frameworks.yaml
@@ -9,15 +9,15 @@ GradientBoosting:
 
 Stacking:
   module: extensions.Stacking
-  version: '0.22.1'
+  version: '1.3.1'
   project: https://scikit-learn.org/stable/modules/ensemble.html#stacking
   params:
     _rf_params: {n_estimators: 200}
     _gbm_params: {n_estimators: 200}
-    _linear_params: {penalty: elasticnet, loss: log}
+    _sgdclassifier_params: {penalty: elasticnet, loss: log_loss}
+    _sgdregressor_params: {penalty: elasticnet}
 #    _svc_params: {tol: 1e-3, max_iter: 1e5}
 #    _final_params: {penalty: elasticnet, loss: log} # sgd linear
-    _final_params: {max_iter: 1000}  # logistic/linear
 
 H2OAutoML_nightly:
   module: frameworks.H2OAutoML