-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxgb.R
117 lines (99 loc) · 5.46 KB
/
xgb.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
## WRAPPERS FOR BAYESIAN OPTIMIZED XGBOOSTING
## FOR USE IN THE COGNITIVE AND BRAIN HEALTH LABORATORY
##################################################################################################################
##################################################################################################################
#XGBlinear
XGBlinear=function(train_dat,outcome, alpharange=c(0,1), lambdarange=c(1,5),etarange=c(0.001, 0.2),nthread=4, nround=500)
{
## checked required packages
list.of.packages = c("doParallel","parallel","ParBayesianOptimization","xgboost","caret")
new.packages = list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages))
{
cat(paste("The following package(s) are required and will be installed:\n",new.packages,"\n"))
install.packages(new.packages)
}
doParallel::registerDoParallel(nthread)
folds=caret::createFolds(outcome,k=5)
bounds = list(lambda = lambdarange,alpha = alpharange,eta = etarange)
dtrain = xgboost::xgb.DMatrix(data = train_dat, label = outcome)
doParallel::registerDoParallel(nthread)
cl=parallel::makeCluster(nthread)
parallel::clusterExport(cl,c('train_dat','outcome','folds'),envir = environment())
obj_func = function(eta,lambda,alpha,nround)
{
param = list(
eta=eta,
lambda = lambda,
alpha = alpha,
booster = "gblinear",
objective = "reg:squarederror",
eval_metric = "mae")
xgbcv = xgboost::xgb.cv(params = param,
data = xgboost::xgb.DMatrix(data = train_dat, label = outcome),
nround = 500,
folds = folds,
prediction = TRUE,
early_stopping_rounds = 5,
verbose = 0,
maximize = F)
lst = list(Score = -min(xgbcv$evaluation_log$test_mae_mean),nrounds = xgbcv$best_iteration)
return(lst)
}
bayes_out = ParBayesianOptimization::bayesOpt(FUN = obj_func, bounds = bounds,initPoints = length(bounds) + 2,parallel = T,iters.n = nthread,iters.k=nthread)
opt_params = append(list(booster = "gblinear",objective = "reg:squarederror",eval_metric = "mae"),ParBayesianOptimization::getBestPars(bayes_out))
xgbcv = xgboost::xgb.cv(params = opt_params,data=xgboost::xgb.DMatrix(data = train_dat, label = outcome),nround = nround,folds = folds,prediction = TRUE,early_stopping_rounds = 5,verbose = 0,maximize = F, nthread=nthread)
#final XGB model with optimal hyperparameters and nround
xg_mod = xgboost::xgboost(data = xgboost::xgb.DMatrix(data = train_dat, label = outcome), params = opt_params, nround = xgbcv$best_iteration, verbose = F, nthread=nthread)
return(xg_mod)
}
##################################################################################################################
##################################################################################################################
#XGBtree
XGBtree=function(train_dat,outcome, alpharange=c(0,1), lambdarange=c(1,5),etarange=c(0.001, 0.2),max_depth = c(1L, 10L),min_child_weight = c(1, 50),subsample = c(0.1, 1),nthread=4,nround=500)
{
list.of.packages = c("doParallel", "ParBayesianOptimization","xgboost")
new.packages = list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages))
{
cat(paste("The following package(s) are required and will be installed:\n",new.packages,"\n"))
install.packages(new.packages)
}
doParallel::registerDoParallel(nthread)
folds=caret::createFolds(outcome,k=5)
bounds = list(lambda = lambdarange,alpha = alpharange,eta = etarange, max_depth=max_depth, min_child_weight=min_child_weight,subsample=subsample)
dtrain = xgboost::xgb.DMatrix(data = train_dat, label = outcome)
cl = parallel::makeCluster(nthread)
doParallel::registerDoParallel(cl)
parallel::clusterExport(cl,c('train_dat','outcome','folds'),envir = environment())
#optimizer
obj_func = function(eta,lambda,alpha,nround,max_depth,min_child_weight,subsample)
{
param = list(
eta=eta,
lambda = lambda,
alpha = alpha,
max_depth=max_depth,
min_child_weight=min_child_weight,
subsample=subsample,
booster = "gbtree",
objective = "reg:squarederror",
eval_metric = "mae")
xgbcv =xgboost::xgb.cv(params = param,
data = xgboost::xgb.DMatrix(data = train_dat, label = outcome),
nround = 500,
folds = folds,
prediction = TRUE,
early_stopping_rounds = 5,
verbose = 0,
maximize = F)
lst = list(Score = -min(xgbcv$evaluation_log$test_mae_mean),nrounds = xgbcv$best_iteration)
return(lst)
}
bayes_out = ParBayesianOptimization::bayesOpt(FUN = obj_func, bounds = bounds,initPoints = length(bounds) + 2,parallel = T,iters.n = nthread,iters.k=nthread)
opt_params = append(list(booster = "gbtree",objective = "reg:squarederror",eval_metric = "mae"), ParBayesianOptimization::getBestPars(bayes_out))
xgbcv = xgboost::xgb.cv(params = opt_params,data=xgboost::xgb.DMatrix(data = train_dat, label = outcome),nround = nround,folds = folds,prediction = TRUE,early_stopping_rounds = 5,verbose = 0,maximize = F, nthread=nthread)
#final XGB model with optimal hyperparameters and nround
xg_mod = xgboost::xgboost(data = xgboost::xgb.DMatrix(data = train_dat, label = outcome), params = opt_params, nround = xgbcv$best_iteration, verbose = F, nthread=nthread)
return(xg_mod)
}