-
Notifications
You must be signed in to change notification settings - Fork 1
/
Tune_RF_10_2.R
79 lines (78 loc) · 2.14 KB
/
Tune_RF_10_2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#' # Random Forest Model (Samuel N Araya)
#' Model Particulars
# ***********************
model.type = "RF"
#' 7.6 Model number
model.number = 76
#' Training database name
data.fname = "KsatxTrain.rds"
#' List of relevant predictors (columns)
rel.cols <- c("log.Ksat",
# 7-class texture
"rVCOS","rCOS","rMS","rFS","rVFS","rSilt","rClay",
# Bulk density
"Db",
# log Organic Carbon
"logOC",
# Particle sizes
"d10", "d50", "logCU"
)
# ***********************
#' Load the required libraries, libraries should be installed before hand.
#+ setup, include=FALSE
library(plyr)
library(caret) # platform for training
library(randomForest) # RF model
library(doMC) # Parallel computation
#'
#' Set up file directory
wDir <- getwd()
print(wDir)
#'
#' Import Transformed Training data table
train.dt = readRDS(file = file.path(wDir,data.fname))
#'
#' Subset by relevant predictors (columns)
train.dt <- subset(train.dt, select = rel.cols)
#' Print summary of data table
sapply(train.dt,summary)
#'
#' Define tuning parameters
# feature size
n.p = ncol(train.dt)
rf_grid <- expand.grid(mtry = seq(1,n.p))
#'
#' Define training control methods
ctrl_tr <- trainControl(method = "repeatedcv",
number = 10,
repeats = 5,
returnResamp = "all")
#'
#' Save start time
ptm <- proc.time() # save start time
#'
#' Set up parallel proccessing
ncores <- detectCores() # get available cores
print(ncores)
registerDoMC(ncores) # register to work on all cores
#'
#' Tune GBM model
#set.seed(10) # Uncertain how set.seed works with repeated cv and with parallel proccessing.
rf_fit <- train(log.Ksat~., data = train.dt,
method = "rf",
tuneGrid = rf_grid,
trControl = ctrl_tr,
importance = TRUE,
ntree = 5000,
verbose = TRUE)
#'
#' Compute elapsed time
elapsed <- proc.time() - ptm
print(elapsed)
#'
#' Save model
model.fname = paste0(model.type, "_" , model.number,".rds")
saveRDS(rf_fit, file = file.path(wDir,model.fname))
#'
#' Exit R
q()