Skip to content

Commit

Permalink
Update Stacking example to work with Py3.9 and sklearn 1.3
Browse files Browse the repository at this point in the history
  • Loading branch information
PGijsbers committed Oct 7, 2023
1 parent 11eced4 commit 3eec76a
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 11 deletions.
15 changes: 8 additions & 7 deletions examples/custom/extensions/Stacking/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,17 @@ def run(dataset, config):

training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')}
n_jobs = config.framework_params.get('_n_jobs', config.cores) # useful to disable multicore, regardless of the dataset config
estimators_params = {e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'linear', 'svc', 'final']}
estimators_params = {e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'sgdclassifier', 'sgdregressor', 'svc', 'final']}

log.info("Running Sklearn Stacking Ensemble with a maximum time of {}s on {} cores.".format(config.max_runtime_seconds, n_jobs))
log.warning("We completely ignore the requirement to stay within the time limit.")
log.warning("We completely ignore the advice to optimize towards metric: {}.".format(config.metric))


if is_classification:
estimator = StackingClassifier(
estimators=[('rf', RandomForestClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])),
('gbm', GradientBoostingClassifier(random_state=config.seed, **estimators_params['gbm'])),
('linear', SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['linear'])),
('linear', SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['sgdclassifier'])),
# ('svc', LinearSVC(random_state=config.seed, **estimators_params['svc']))
],
# final_estimator=SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']),
Expand All @@ -54,19 +53,20 @@ def run(dataset, config):
estimator = StackingRegressor(
estimators=[('rf', RandomForestRegressor(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])),
('gbm', GradientBoostingRegressor(random_state=config.seed, **estimators_params['gbm'])),
('linear', SGDRegressor(random_state=config.seed, **estimators_params['linear'])),
('linear', SGDRegressor(random_state=config.seed, **estimators_params['sgdregressor'])),
('svc', LinearSVR(random_state=config.seed, **estimators_params['svc']))
],
# final_estimator=SGDRegressor(random_state=config.seed, **estimators_params['final']),
final_estimator=LinearRegression(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']),
final_estimator=LinearRegression(n_jobs=n_jobs),
n_jobs=n_jobs,
**training_params
)

with Timer() as training:
estimator.fit(X_train, y_train)

predictions = estimator.predict(X_test)
with Timer() as predict:
predictions = estimator.predict(X_test)
probabilities = estimator.predict_proba(X_test) if is_classification else None

return result(output_file=config.output_predictions_file,
Expand All @@ -75,7 +75,8 @@ def run(dataset, config):
probabilities=probabilities,
target_is_encoded=is_classification,
models_count=len(estimator.estimators_) + 1,
training_duration=training.duration)
training_duration=training.duration,
predict_duration=predict.duration)


if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion examples/custom/extensions/Stacking/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
scikit-learn==0.22.1
scikit-learn==1.3.1
6 changes: 3 additions & 3 deletions examples/custom/frameworks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ GradientBoosting:

Stacking:
module: extensions.Stacking
version: '0.22.1'
version: '1.3.1'
project: https://scikit-learn.org/stable/modules/ensemble.html#stacking
params:
_rf_params: {n_estimators: 200}
_gbm_params: {n_estimators: 200}
_linear_params: {penalty: elasticnet, loss: log}
_sgdclassifier_params: {penalty: elasticnet, loss: log_loss}
_sgdregressor_params: {penalty: elasticnet}
# _svc_params: {tol: 1e-3, max_iter: 1e5}
# _final_params: {penalty: elasticnet, loss: log} # sgd linear
_final_params: {max_iter: 1000} # logistic/linear

H2OAutoML_nightly:
module: frameworks.H2OAutoML
Expand Down

0 comments on commit 3eec76a

Please sign in to comment.