Update Stacking example to work with Py3.9 and sklearn 1.3

openml · Oct 7, 2023 · 3eec76a · 3eec76a
1 parent 11eced4
commit 3eec76a
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 11 deletions.
diff --git a/examples/custom/extensions/Stacking/exec.py b/examples/custom/extensions/Stacking/exec.py
@@ -30,18 +30,17 @@ def run(dataset, config):
 
     training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')}
     n_jobs = config.framework_params.get('_n_jobs', config.cores)  # useful to disable multicore, regardless of the dataset config
-    estimators_params = {e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'linear', 'svc', 'final']}
+    estimators_params = {e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'sgdclassifier', 'sgdregressor', 'svc', 'final']}
 
     log.info("Running Sklearn Stacking Ensemble with a maximum time of {}s on {} cores.".format(config.max_runtime_seconds, n_jobs))
     log.warning("We completely ignore the requirement to stay within the time limit.")
     log.warning("We completely ignore the advice to optimize towards metric: {}.".format(config.metric))
 
-
     if is_classification:
         estimator = StackingClassifier(
             estimators=[('rf', RandomForestClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])),
                         ('gbm', GradientBoostingClassifier(random_state=config.seed, **estimators_params['gbm'])),
-                        ('linear', SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['linear'])),
+                        ('linear', SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['sgdclassifier'])),
                         # ('svc', LinearSVC(random_state=config.seed, **estimators_params['svc']))
                         ],
             # final_estimator=SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']),
@@ -54,19 +53,20 @@ def run(dataset, config):
         estimator = StackingRegressor(
             estimators=[('rf', RandomForestRegressor(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])),
                         ('gbm', GradientBoostingRegressor(random_state=config.seed, **estimators_params['gbm'])),
-                        ('linear', SGDRegressor(random_state=config.seed, **estimators_params['linear'])),
+                        ('linear', SGDRegressor(random_state=config.seed, **estimators_params['sgdregressor'])),
                         ('svc', LinearSVR(random_state=config.seed, **estimators_params['svc']))
                         ],
             # final_estimator=SGDRegressor(random_state=config.seed, **estimators_params['final']),
-            final_estimator=LinearRegression(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']),
+            final_estimator=LinearRegression(n_jobs=n_jobs),
             n_jobs=n_jobs,
             **training_params
         )
 
     with Timer() as training:
         estimator.fit(X_train, y_train)
 
-    predictions = estimator.predict(X_test)
+    with Timer() as predict:
+        predictions = estimator.predict(X_test)
     probabilities = estimator.predict_proba(X_test) if is_classification else None
 
     return result(output_file=config.output_predictions_file,
@@ -75,7 +75,8 @@ def run(dataset, config):
                   probabilities=probabilities,
                   target_is_encoded=is_classification,
                   models_count=len(estimator.estimators_) + 1,
-                  training_duration=training.duration)
+                  training_duration=training.duration,
+                  predict_duration=predict.duration)
 
 
 if __name__ == '__main__':

diff --git a/examples/custom/extensions/Stacking/requirements.txt b/examples/custom/extensions/Stacking/requirements.txt
@@ -1 +1 @@
-scikit-learn==0.22.1
+scikit-learn==1.3.1
diff --git a/examples/custom/frameworks.yaml b/examples/custom/frameworks.yaml
@@ -9,15 +9,15 @@ GradientBoosting:
 
 Stacking:
   module: extensions.Stacking
-  version: '0.22.1'
+  version: '1.3.1'
   project: https://scikit-learn.org/stable/modules/ensemble.html#stacking
   params:
     _rf_params: {n_estimators: 200}
     _gbm_params: {n_estimators: 200}
-    _linear_params: {penalty: elasticnet, loss: log}
+    _sgdclassifier_params: {penalty: elasticnet, loss: log_loss}
+    _sgdregressor_params: {penalty: elasticnet}
 #    _svc_params: {tol: 1e-3, max_iter: 1e5}
 #    _final_params: {penalty: elasticnet, loss: log} # sgd linear
-    _final_params: {max_iter: 1000}  # logistic/linear
 
 H2OAutoML_nightly:
   module: frameworks.H2OAutoML