From 3eec76a6e83c4d2aeceb0969bc2948c87e559fc1 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sat, 7 Oct 2023 13:31:43 +0200 Subject: [PATCH] Update Stacking example to work with Py3.9 and sklearn 1.3 --- examples/custom/extensions/Stacking/exec.py | 15 ++++++++------- .../custom/extensions/Stacking/requirements.txt | 2 +- examples/custom/frameworks.yaml | 6 +++--- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/examples/custom/extensions/Stacking/exec.py b/examples/custom/extensions/Stacking/exec.py index d8c80879d..47321feb6 100644 --- a/examples/custom/extensions/Stacking/exec.py +++ b/examples/custom/extensions/Stacking/exec.py @@ -30,18 +30,17 @@ def run(dataset, config): training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')} n_jobs = config.framework_params.get('_n_jobs', config.cores) # useful to disable multicore, regardless of the dataset config - estimators_params = {e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'linear', 'svc', 'final']} + estimators_params = {e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'sgdclassifier', 'sgdregressor', 'svc', 'final']} log.info("Running Sklearn Stacking Ensemble with a maximum time of {}s on {} cores.".format(config.max_runtime_seconds, n_jobs)) log.warning("We completely ignore the requirement to stay within the time limit.") log.warning("We completely ignore the advice to optimize towards metric: {}.".format(config.metric)) - if is_classification: estimator = StackingClassifier( estimators=[('rf', RandomForestClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])), ('gbm', GradientBoostingClassifier(random_state=config.seed, **estimators_params['gbm'])), - ('linear', SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['linear'])), + ('linear', SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['sgdclassifier'])), # ('svc', LinearSVC(random_state=config.seed, **estimators_params['svc'])) ], # final_estimator=SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']), @@ -54,11 +53,11 @@ def run(dataset, config): estimator = StackingRegressor( estimators=[('rf', RandomForestRegressor(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])), ('gbm', GradientBoostingRegressor(random_state=config.seed, **estimators_params['gbm'])), - ('linear', SGDRegressor(random_state=config.seed, **estimators_params['linear'])), + ('linear', SGDRegressor(random_state=config.seed, **estimators_params['sgdregressor'])), ('svc', LinearSVR(random_state=config.seed, **estimators_params['svc'])) ], # final_estimator=SGDRegressor(random_state=config.seed, **estimators_params['final']), - final_estimator=LinearRegression(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']), + final_estimator=LinearRegression(n_jobs=n_jobs), n_jobs=n_jobs, **training_params ) @@ -66,7 +65,8 @@ def run(dataset, config): with Timer() as training: estimator.fit(X_train, y_train) - predictions = estimator.predict(X_test) + with Timer() as predict: + predictions = estimator.predict(X_test) probabilities = estimator.predict_proba(X_test) if is_classification else None return result(output_file=config.output_predictions_file, @@ -75,7 +75,8 @@ def run(dataset, config): probabilities=probabilities, target_is_encoded=is_classification, models_count=len(estimator.estimators_) + 1, - training_duration=training.duration) + training_duration=training.duration, + predict_duration=predict.duration) if __name__ == '__main__': diff --git a/examples/custom/extensions/Stacking/requirements.txt b/examples/custom/extensions/Stacking/requirements.txt index d2afe9e80..73f0cadcd 100644 --- a/examples/custom/extensions/Stacking/requirements.txt +++ b/examples/custom/extensions/Stacking/requirements.txt @@ -1 +1 @@ -scikit-learn==0.22.1 +scikit-learn==1.3.1 diff --git a/examples/custom/frameworks.yaml b/examples/custom/frameworks.yaml index a68884811..e711bcbbb 100644 --- a/examples/custom/frameworks.yaml +++ b/examples/custom/frameworks.yaml @@ -9,15 +9,15 @@ GradientBoosting: Stacking: module: extensions.Stacking - version: '0.22.1' + version: '1.3.1' project: https://scikit-learn.org/stable/modules/ensemble.html#stacking params: _rf_params: {n_estimators: 200} _gbm_params: {n_estimators: 200} - _linear_params: {penalty: elasticnet, loss: log} + _sgdclassifier_params: {penalty: elasticnet, loss: log_loss} + _sgdregressor_params: {penalty: elasticnet} # _svc_params: {tol: 1e-3, max_iter: 1e5} # _final_params: {penalty: elasticnet, loss: log} # sgd linear - _final_params: {max_iter: 1000} # logistic/linear H2OAutoML_nightly: module: frameworks.H2OAutoML