-
Notifications
You must be signed in to change notification settings - Fork 180
/
Copy pathxgboost_cv_integration.py
64 lines (47 loc) · 2.4 KB
/
xgboost_cv_integration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
"""
Optuna example that demonstrates a pruner for XGBoost.cv.
In this example, we optimize the validation auc of cancer detection using XGBoost.
We optimize both the choice of booster model and their hyperparameters. Throughout
training of models, a pruner observes intermediate results and stop unpromising trials.
You can run this example as follows:
$ python xgboost_cv_integration.py
"""
import optuna
import sklearn.datasets
import xgboost as xgb
def objective(trial):
train_x, train_y = sklearn.datasets.load_breast_cancer(return_X_y=True)
dtrain = xgb.DMatrix(train_x, label=train_y)
param = {
"verbosity": 0,
"objective": "binary:logistic",
"eval_metric": "auc",
"booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
"lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
"alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
}
if param["booster"] == "gbtree" or param["booster"] == "dart":
param["max_depth"] = trial.suggest_int("max_depth", 1, 9)
param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
param["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])
if param["booster"] == "dart":
param["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
param["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)
pruning_callback = optuna.integration.XGBoostPruningCallback(trial, "test-auc")
history = xgb.cv(param, dtrain, num_boost_round=100, callbacks=[pruning_callback])
mean_auc = history["test-auc-mean"].values[-1]
return mean_auc
if __name__ == "__main__":
pruner = optuna.pruners.MedianPruner(n_warmup_steps=5)
study = optuna.create_study(pruner=pruner, direction="maximize")
study.optimize(objective, n_trials=100)
print("Number of finished trials: {}".format(len(study.trials)))
print("Best trial:")
trial = study.best_trial
print(" Value: {}".format(trial.value))
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))