Skip to content

Commit

Permalink
Merge pull request #27 from tmastny/dev
Browse files Browse the repository at this point in the history
Merge Dev Branch
  • Loading branch information
tmastny authored May 28, 2018
2 parents 34ff68c + a34125c commit 62f9060
Show file tree
Hide file tree
Showing 24 changed files with 500 additions and 300 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
.RData
.Ruserdata
leadrboard.RDS
models_one/
inst/doc
model*.RDS
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ Imports:
crayon,
pillar,
here,
rlang
rlang,
usethis
Remotes: tidyverse/magrittr
RoxygenNote: 6.0.1.9000
VignetteBuilder: knitr
6 changes: 5 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,17 @@ S3method(format,id)
S3method(pillar_shaft,id)
S3method(print,id)
S3method(type_sum,id)
export(as_argument)
export(as_id)
export(at_last)
export(board)
export(git_ignore_models)
export(id)
export(model_list)
export(modeler)
export(oof_grab)
export(peak)
export(to_list)
export(run)
importFrom(crayon,bgGreen)
importFrom(crayon,black)
importFrom(magrittr,"%>%")
Expand Down
76 changes: 28 additions & 48 deletions R/board.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#' A tibble leaderboard for \code{caret} \code{train} objects
#'
#' This function updates and returns
#' the model leaderboard for the project. Please read the
#' the model leaderboard for the project. The function also saves a leaderboard
#' tibble in the current working directory and models to a subdirectory.
#' Please read the
#' \href{https://github.com/tmastny/leadr}{README} and the
#' introduction
#' \href{https://tmastny.github.io/leadr/articles/introduction.html}{vignette}.
Expand All @@ -10,86 +12,62 @@
#' @param model model to add to the leaderboard. If no model is supplied
#' (the default \code{null}), \code{board} returns the leaderboard tibble
#' for the project.
#' @param path globally sets the path to save models and leaderboards. By
#' default, the path is the project directory found by. For best results, the path
#' string should be constructed with \code{file.path} or
#' @param path the path to the saved model directory. The default path
#' is to a folder called \code{models} in the current working directory.
#' For best results, the path string should be constructed with \code{file.path} or
#' \href{https://github.com/krlmlr/here}{\code{here::here()}}.
#' @param dir globally sets name of directory where models are saved.
#' @param dir the name of directory where the model is saved.
#' This will be a subdirectory of the specified path. The default directory
#' is named \code{models_one}. If no argument is supplied, the model will be saved
#' in the previously specified directory. See the example below.
#' is named \code{initial}. This is commonly used to group similar models.
#' @param save whether \code{board} should save the supplied model to \code{dir}. If
#' \code{FALSE} the model will not be saved, but will be added to the leaderboard.
#' @param quiet whether \code{board} should return the leaderboard tibble to the console.
#' By default \code{quiet = FALSE} means that the tibble prints to console.
#' \code{quiet = TRUE} is useful in a \code{.Rmd} environment, where you want to add
#' @param invisible whether \code{board} should return the leaderboard tibble to the console.
#' By default \code{invisible = FALSE} means that the tibble prints to console.
#' \code{invisible = TRUE} is useful in a \code{.Rmd} environment, where you want to add
#' the model to the leaderboard without printing the tibble.
#'
#' @return \code{tibble} containing the most up-to-date leaderboard.
#' @return \code{tibble} containing the most up-to-date leaderboard. Also has the
#' side-effects of creating and/or saving \code{leadrboard.RDS} to the current
#' working directory, and saving the supplied model to the subdirectory.
#'
#' @examples
#' # add caret model to leaderboard
#' # model saved to "models_one"
#' model <- train(...)
#' leadr::board(model)
#' board(model)
#'
#' # return tibble leaderboard
#' leadr::board()
#' board()
#'
#' # save to different directory
#' ensemble <- train(...)
#' leadr::board(ensemble, dir = "ensembles_one")
#'
#' # board automatically saves to previous directory
#' # model saved to "ensembles_one"
#' ensemble2 <- train(...)
#' leadr::board(ensembles2)
#' board(ensemble, dir = "ensembles")
#'
#' @importFrom magrittr %>%
#' @export
board <- function(
model = NULL, path = NULL, dir = NULL, save = TRUE, quiet = FALSE) {

default_dir = "models_one"

if (is.null(get_path()) || (!is.null(path))) {
if (is.null(path)) {
set_path(here::here())
} else {
set_path(path)
}
}
path = get_path()

if (is.null(get_dir()) || (!is.null(dir))) {
if (is.null(dir)) {
set_dir(default_dir)
} else {
set_dir(dir)
}
}
dir = get_dir()
model = NULL, path = file.path(getwd(), "models"), dir = "initial",
save = TRUE, invisible = FALSE) {

leadrboard <- new_leadrboard()
leadrboard_path <- file.path(path, "leadrboard.RDS")
leadrboard_path <- file.path(getwd(), "leadrboard.RDS")
if (file.exists(leadrboard_path))
leadrboard <- readRDS(leadrboard_path)

if (!is.null(model)) {
model_id = nrow(leadrboard) + 1
leadrboard <- add_to(leadrboard, model, model_id, dir)
leadrboard <- add_to(leadrboard, model, model_id, dir, path)
saveRDS(leadrboard, leadrboard_path)

if (save) {
model_path = file.path(path, dir)
if (!dir.exists(model_path))
dir.create(model_path)
dir.create(model_path, recursive = TRUE)

saveRDS(model, file.path(model_path, paste0("model", model_id, ".RDS")))
}
}

if (quiet) return(invisible(leadrboard))
if (invisible) return(base::invisible(leadrboard))

leadrboard$id <- id(leadrboard$id)
leadrboard
Expand All @@ -109,11 +87,12 @@ new_leadrboard <- function() {
group = integer(),
index = list(),
tune = list(),
seeds = list()
seeds = list(),
path = character()
)
}

add_to <- function(leadrboard, model, id, dir) {
add_to <- function(leadrboard, model, id, dir, path) {
new_row = list()
new_row$rank = 1
new_row$id = id
Expand All @@ -127,8 +106,9 @@ add_to <- function(leadrboard, model, id, dir) {
new_row$num = model$control$number
new_row$group = NA
new_row$index = list(model$control$index)
new_row$tune = list(as.list(model$bestTune))
new_row$tune = list(model$bestTune)
new_row$seeds = list(model$control$seeds)
new_row$path = path
} else {
stop("leadr only supports caret train objects (so far).")
}
Expand Down
83 changes: 83 additions & 0 deletions R/ensemble_tools.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#' Return out of fold model predictions
#'
#' Given a model or list of models, this function returns the
#' out of fold predictions. These out of fold predictions can be used
#' to make stacked or blended ensembles. See the ensemble
#' \href{https://tmastny.github.io/leadr/articles/ensemble.html}{vignette}
#' for examples.
#'
#' @param models A model or list of models to get the predictions
#' @param type the results of the prediction. For classification models,
#' \code{"raw"} returns the outcome label and \code{"prob"} returns the
#' label probabilities.
#'
#' @return a tibble with one column per model and a column of the training data
#' outcomes. If \code{type = "prob"} there will be n columns per model, where
#' n is the number of labels in the outcome.
#'
#' @examples
#' oofs <- oof_grab(models)
#'
#' @importFrom magrittr %>%
#' @export
oof_grab <- function(models, type = "raw") {
if (inherits(models, "train")) models <- list(models)

agg_data <- purrr::map_dfc(models, grabber, type)
agg_data <- agg_data %>% add_observed(models[[1]])
agg_data
}

grabber <- function(model, type) {

if (is.null(model$pred)) {
stop("Out of fold predictions were not saved in the caret model. ",
"Re-run with savePredictions = 'final' or TRUE in trainControl.")
}
grabbers <- list(raw = pred_grabber, prob = prob_grabber)
grab <- grabbers[[type]]

if (is.null(grab)) stop("Not a valid type. Use raw or prob.")

pred_data <- tune_filter(model)
grab(pred_data, model)
}

prob_grabber <- function(data, model) {
columns <- as.character(unique(model$trainingData$.outcome))
if (all(!columns %in% names(data))) {
stop("Probabilities were not saved, or are not available in the caret model. ",
"Re-run with classProbs = TRUE in trainControl.")
}
tibble::as_tibble(data[orderer(data), columns])
}

pred_grabber <- function(data, model) {
tibble::as_tibble(data$pred[orderer(data)])
}

orderer <- function(data) {
order(data$rowIndex)
}

tune_filter <- function(model) {
col_names <- names(model$bestTune)
col_values <- model$bestTune
filtered_pred <- model$pred %>%
dplyr::filter(
!!!purrr::map2(
col_names, col_values,
~rlang::quo(!!rlang::sym(.x) == !!.y)
)
)
}

add_observed <- function(agg_data, model) {
outcome <- attr(model$terms, "variables")[[2]]

data <- tune_filter(model)
observed <- data$obs[orderer(data)]
agg_data <- agg_data %>%
tibble::add_column(!!outcome := observed)
}

19 changes: 1 addition & 18 deletions R/globals.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,7 @@
# package globals
.globals <- new.env(parent = emptyenv())
.globals$path <- NULL
.globals$dir <- NULL
.globals$id <- NULL

get_path <- function() {
.globals$path
}

set_path <- function(path) {
.globals$path <- path
}

get_dir <- function() {
.globals$dir
}

set_dir <- function(dir) {
.globals$dir <- dir
}

get_id <- function() {
.globals$id
}
Expand All @@ -28,3 +10,4 @@ set_id <- function(id) {
.globals$id <- id
}


Loading

0 comments on commit 62f9060

Please sign in to comment.