From f347f5fc5984b8840965ae83168187f69841b413 Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Fri, 20 Apr 2018 13:29:27 -0500 Subject: [PATCH 01/14] add function to ignore model*.RDS files from board --- .gitignore | 1 + DESCRIPTION | 3 ++- NAMESPACE | 1 + R/utils.R | 10 ++++++++++ man/git_ignore_models.Rd | 13 +++++++++++++ 5 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 R/utils.R create mode 100644 man/git_ignore_models.Rd diff --git a/.gitignore b/.gitignore index ca667db..4aee09f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ leadrboard.RDS models_one/ inst/doc +model*.RDS diff --git a/DESCRIPTION b/DESCRIPTION index c3dc1c5..187352c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,8 @@ Imports: crayon, pillar, here, - rlang + rlang, + usethis Remotes: tidyverse/magrittr RoxygenNote: 6.0.1.9000 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 05e53e0..fa5777a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ S3method(type_sum,id) export(as_id) export(at_last) export(board) +export(git_ignore_models) export(id) export(oof_grab) export(peak) diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..9165445 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,10 @@ +#' Add \code{model*.RDS} to .gitignore +#' +#' Adds file types generated by +#' \code{board} of type \code{model*.RDS} to .gitignore. Model types +#' can be very large, so you may not want to save them to a git repo. +#' +#' @export +git_ignore_models <- function() { + usethis::use_git_ignore("model*.RDS") +} diff --git a/man/git_ignore_models.Rd b/man/git_ignore_models.Rd new file mode 100644 index 0000000..b286af2 --- /dev/null +++ b/man/git_ignore_models.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{git_ignore_models} +\alias{git_ignore_models} +\title{Add \code{model*.RDS} to .gitignore} +\usage{ +git_ignore_models() +} +\description{ +Adds file types generated by +\code{board} of type \code{model*.RDS} to .gitignore. Model types +can be very large, so you may not want to save them to a git repo. +} From 684e3780a39b1d4ed1911f8f551f00aacd3caac7 Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Fri, 20 Apr 2018 14:13:10 -0500 Subject: [PATCH 02/14] removing global directory and path dependencies; untested --- NAMESPACE | 2 +- R/board.R | 35 ++++++++----------------------- R/globals.R | 19 +---------------- R/model_tools.R | 15 +++++++++---- R/peak.R | 2 +- man/board.Rd | 4 ++-- man/{to_list.Rd => model_list.Rd} | 6 +++--- 7 files changed, 28 insertions(+), 55 deletions(-) rename man/{to_list.Rd => model_list.Rd} (90%) diff --git a/NAMESPACE b/NAMESPACE index fa5777a..25e7113 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,9 +11,9 @@ export(at_last) export(board) export(git_ignore_models) export(id) +export(model_list) export(oof_grab) export(peak) -export(to_list) importFrom(crayon,bgGreen) importFrom(crayon,black) importFrom(magrittr,"%>%") diff --git a/R/board.R b/R/board.R index 2e8a6d1..cd807fd 100644 --- a/R/board.R +++ b/R/board.R @@ -48,42 +48,23 @@ #' @importFrom magrittr %>% #' @export board <- function( - model = NULL, path = NULL, dir = NULL, save = TRUE, quiet = FALSE) { - - default_dir = "models_one" - - if (is.null(get_path()) || (!is.null(path))) { - if (is.null(path)) { - set_path(here::here()) - } else { - set_path(path) - } - } - path = get_path() - - if (is.null(get_dir()) || (!is.null(dir))) { - if (is.null(dir)) { - set_dir(default_dir) - } else { - set_dir(dir) - } - } - dir = get_dir() + model = NULL, path = here::here("models"), dir = "initial", save = TRUE, + quiet = FALSE) { leadrboard <- new_leadrboard() - leadrboard_path <- file.path(path, "leadrboard.RDS") + leadrboard_path <- here::here("leadrboard.RDS") if (file.exists(leadrboard_path)) leadrboard <- readRDS(leadrboard_path) if (!is.null(model)) { model_id = nrow(leadrboard) + 1 - leadrboard <- add_to(leadrboard, model, model_id, dir) + leadrboard <- add_to(leadrboard, model, model_id, dir, path) saveRDS(leadrboard, leadrboard_path) if (save) { model_path = file.path(path, dir) if (!dir.exists(model_path)) - dir.create(model_path) + dir.create(model_path, recursive = TRUE) saveRDS(model, file.path(model_path, paste0("model", model_id, ".RDS"))) } @@ -109,11 +90,12 @@ new_leadrboard <- function() { group = integer(), index = list(), tune = list(), - seeds = list() + seeds = list(), + path = character() ) } -add_to <- function(leadrboard, model, id, dir) { +add_to <- function(leadrboard, model, id, dir, path) { new_row = list() new_row$rank = 1 new_row$id = id @@ -129,6 +111,7 @@ add_to <- function(leadrboard, model, id, dir) { new_row$index = list(model$control$index) new_row$tune = list(as.list(model$bestTune)) new_row$seeds = list(model$control$seeds) + new_row$path = path } else { stop("leadr only supports caret train objects (so far).") } diff --git a/R/globals.R b/R/globals.R index 86dde62..7b0d622 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,25 +1,7 @@ # package globals .globals <- new.env(parent = emptyenv()) -.globals$path <- NULL -.globals$dir <- NULL .globals$id <- NULL -get_path <- function() { - .globals$path -} - -set_path <- function(path) { - .globals$path <- path -} - -get_dir <- function() { - .globals$dir -} - -set_dir <- function(dir) { - .globals$dir <- dir -} - get_id <- function() { .globals$id } @@ -28,3 +10,4 @@ set_id <- function(id) { .globals$id <- id } + diff --git a/R/model_tools.R b/R/model_tools.R index 9254aad..4e429fa 100644 --- a/R/model_tools.R +++ b/R/model_tools.R @@ -96,12 +96,19 @@ add_observed <- function(agg_data, model) { #' to_list() #' #' @export -to_list <- function(leadrboard) { - purrr::map2(leadrboard$id, leadrboard$dir, get_model) +model_list <- function(leadrboard) { + purrr::pmap( + list( + leadrboard$path, + leadrboard$dir, + leadrboard$id + ), + get_model + ) } -get_model <- function(id, dir) { - model_dir <- here::here(dir) +get_model <- function(path, dir, id) { + model_dir <- file.path(path, dir) if (!dir.exists(model_dir)) { warning("The directory ", dir, " does not exist. Returning NA.") return(NA) diff --git a/R/peak.R b/R/peak.R index fac4096..b360151 100644 --- a/R/peak.R +++ b/R/peak.R @@ -101,7 +101,7 @@ return_pos <- function(models, id, window_size = 10, place) { #' @export at_last <- function(number = 1) { number <- 1:number - load_path <- file.path(get_path(), "leadrboard.RDS") + load_path <- file.path(here::here(), "leadrboard.RDS") nrow(readRDS(load_path)) - number + 1 } diff --git a/man/board.Rd b/man/board.Rd index 4dcb6f9..a9d3de6 100644 --- a/man/board.Rd +++ b/man/board.Rd @@ -4,8 +4,8 @@ \alias{board} \title{A tibble leaderboard for \code{caret} \code{train} objects} \usage{ -board(model = NULL, path = NULL, dir = NULL, save = TRUE, - quiet = FALSE) +board(model = NULL, path = here::here("models"), dir = "initial", + save = TRUE, quiet = FALSE) } \arguments{ \item{model}{model to add to the leaderboard. If no model is supplied diff --git a/man/to_list.Rd b/man/model_list.Rd similarity index 90% rename from man/to_list.Rd rename to man/model_list.Rd index 5d1dbf7..e59f253 100644 --- a/man/to_list.Rd +++ b/man/model_list.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/model_tools.R -\name{to_list} -\alias{to_list} +\name{model_list} +\alias{model_list} \title{Convert (subset) of the leaderboard tibble to a list of models} \usage{ -to_list(leadrboard) +model_list(leadrboard) } \arguments{ \item{leadrboard}{the leaderboard tibble, or a filtered verison of it From 4a85ef724046a5499c9d2df7efdb60ae6e57ceb4 Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Fri, 20 Apr 2018 14:34:08 -0500 Subject: [PATCH 03/14] to_list renamed and now save model names in list. closes #17 --- .gitignore | 1 - R/model_tools.R | 21 ++++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 4aee09f..6705e60 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,5 @@ .RData .Ruserdata leadrboard.RDS -models_one/ inst/doc model*.RDS diff --git a/R/model_tools.R b/R/model_tools.R index 4e429fa..40be67f 100644 --- a/R/model_tools.R +++ b/R/model_tools.R @@ -97,16 +97,23 @@ add_observed <- function(agg_data, model) { #' #' @export model_list <- function(leadrboard) { - purrr::pmap( - list( - leadrboard$path, - leadrboard$dir, - leadrboard$id - ), - get_model + model_locations <- list( + leadrboard$path, + leadrboard$dir, + leadrboard$id ) + models <- purrr::pmap(model_locations, get_model) + + model_ids <- list( + leadrboard$model, + leadrboard$id + ) + model_names <- purrr::pmap(model_ids, paste, sep = "_") + names(models) <- model_names + models } + get_model <- function(path, dir, id) { model_dir <- file.path(path, dir) if (!dir.exists(model_dir)) { From 2fca552192d5cc9d923b99fa464ff701f1c10685 Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Fri, 20 Apr 2018 15:06:44 -0500 Subject: [PATCH 04/14] tests new path and directory structure. closes #12 --- tests/testthat/test-aaa-directory.R | 71 ++++++++--------------------- 1 file changed, 18 insertions(+), 53 deletions(-) diff --git a/tests/testthat/test-aaa-directory.R b/tests/testthat/test-aaa-directory.R index 74a79d5..ead206a 100644 --- a/tests/testthat/test-aaa-directory.R +++ b/tests/testthat/test-aaa-directory.R @@ -1,6 +1,7 @@ context("directory") library(caret) +library(dplyr) test_that("board can save models to a different directory", { directory = "model_test" @@ -8,69 +9,33 @@ test_that("board can save models to a different directory", { model <- train(Species ~ ., data = iris, method = 'rf') board(model, dir = directory) - dir_path = here::here(directory) - expect_true(dir.exists(dir_path)) + model_root <- board() %>% + filter(id == at_last()) %>% + .$path - path_to_file <- file.path(dir_path, "model1.RDS") - expect_true(file.exists(path_to_file)) -}) - -test_that("board automatically saved to previous directory", { - model <- train(Species ~ ., data = iris, method = 'rf') - board(model) - - directory = "model_test" - dir_path = here::here(directory) - path_to_file <- file.path(dir_path, "model2.RDS") - expect_true(file.exists(path_to_file)) - - unlink(dir_path, recursive = TRUE) - unlink(file.path(here::here(), "leadrboard.RDS")) -}) - -test_that("board leaderboard can exist in subdirectory of root", { - # clean up - leadr:::set_path(NULL) - leadr:::set_dir(NULL) - - new_sub <- "new_sub" - new_path <- here::here(new_sub) - dir.create(new_path) + model_dir <- file.path(model_root, directory) + expect_true(dir.exists(model_dir)) - model <- train(Species ~ ., data = iris, method = 'rf') - board(model, new_path) - - leadrboard_path <- file.path(new_path, "leadrboard.RDS") - expect_true(file.exists(leadrboard_path)) - - path_to_model <- file.path(new_path, "models_one", "model1.RDS") + path_to_model <- file.path(model_dir, "model1.RDS") expect_true(file.exists(path_to_model)) }) -test_that("board saves model to previous path", { - model <- train(Species ~ ., data = iris, method = 'rf') - board(model) - - new_sub <- "new_sub" - new_path <- here::here(new_sub) - path_to_model <- file.path(new_path, "models_one", "model2.RDS") - expect_true(file.exists(path_to_model)) -}) - -test_that("board can add new folder to new path", { - new_dir = "new_dir" +test_that("board can have model directory in non-root folder", { + not_root <- here::here("not_root_dir") + new_path <- file.path(not_root, "new_models") model <- train(Species ~ ., data = iris, method = 'rf') - board(model, dir = new_dir) + board(model, path = new_path) + + saved_path <- board() %>% + filter(id == at_last()) %>% + .$path + expect_equal(saved_path, new_path) - new_sub <- "new_sub" - new_path <- here::here(new_sub) - path_to_model <- file.path(new_path, new_dir, "model3.RDS") + path_to_model <- file.path(new_path, "initial", "model2.RDS") expect_true(file.exists(path_to_model)) - unlink(new_path, recursive = TRUE) - leadr:::set_path(NULL) - leadr:::set_dir(NULL) + unlink(not_root, recursive = TRUE) }) From fd2a5d103d714fb9d6905c9968bdfa8da8597f26 Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Fri, 20 Apr 2018 16:30:02 -0500 Subject: [PATCH 05/14] tested changes from previous commit --- tests/testthat/test-aaa-directory.R | 6 ++++-- tests/testthat/test-board.R | 24 +++++++++++++++++---- tests/testthat/test-ppp-oof-grab.R | 32 ++++++++++++++++++++-------- tests/testthat/test-zzw-model-list.R | 30 ++++++++++++++++++++++++++ tests/testthat/test-zzw-to-list.R | 30 -------------------------- tests/testthat/test-zzz-clean-up.R | 6 +++--- 6 files changed, 80 insertions(+), 48 deletions(-) create mode 100644 tests/testthat/test-zzw-model-list.R delete mode 100644 tests/testthat/test-zzw-to-list.R diff --git a/tests/testthat/test-aaa-directory.R b/tests/testthat/test-aaa-directory.R index ead206a..6e9915d 100644 --- a/tests/testthat/test-aaa-directory.R +++ b/tests/testthat/test-aaa-directory.R @@ -16,7 +16,8 @@ test_that("board can save models to a different directory", { model_dir <- file.path(model_root, directory) expect_true(dir.exists(model_dir)) - path_to_model <- file.path(model_dir, "model1.RDS") + model_name <- paste0("model", at_last(), ".RDS") + path_to_model <- file.path(model_dir, model_name) expect_true(file.exists(path_to_model)) }) @@ -32,7 +33,8 @@ test_that("board can have model directory in non-root folder", { .$path expect_equal(saved_path, new_path) - path_to_model <- file.path(new_path, "initial", "model2.RDS") + model_name <- paste0("model", at_last(), ".RDS") + path_to_model <- file.path(new_path, "initial", model_name) expect_true(file.exists(path_to_model)) unlink(not_root, recursive = TRUE) diff --git a/tests/testthat/test-board.R b/tests/testthat/test-board.R index daa98ee..2dd57ba 100644 --- a/tests/testthat/test-board.R +++ b/tests/testthat/test-board.R @@ -23,10 +23,18 @@ test_that("Model is saved to directory", { trControl = control ) - leadr::board(model) + board(model) expect_true(file.exists(here::here("leadrboard.RDS"))) - expect_true(file.exists(here::here("models_one", "model1.RDS"))) + + path_dir <- board() %>% + filter(id == at_last()) %>% + select(path, dir) %>% + unlist(., use.names = FALSE) + + path_to_model <- file.path(path_dir[1], path_dir[2], + paste0("model", at_last(), ".RDS")) + expect_true(file.exists(path_to_model)) }) test_that("Next model", { @@ -47,10 +55,18 @@ test_that("Next model", { trControl = control ) - leadr::board(model) + board(model) expect_true(file.exists(here::here("leadrboard.RDS"))) - expect_true(file.exists(here::here("models_one", "model2.RDS"))) + + path_dir <- board() %>% + filter(id == at_last()) %>% + select(path, dir) %>% + unlist(., use.names = FALSE) + + path_to_model <- file.path(path_dir[1], path_dir[2], + paste0("model", at_last(), ".RDS")) + expect_true(file.exists(path_to_model)) }) diff --git a/tests/testthat/test-ppp-oof-grab.R b/tests/testthat/test-ppp-oof-grab.R index 3644777..e448d21 100644 --- a/tests/testthat/test-ppp-oof-grab.R +++ b/tests/testthat/test-ppp-oof-grab.R @@ -3,8 +3,12 @@ context("oof") library(caret) test_that("oof_grab works with one model or a list", { - m1 <- readRDS(here::here("models_one", "model1.RDS")) - m2 <- readRDS(here::here("models_one", "model2.RDS")) + models <- board() %>% + filter(id %in% c(1, 2)) %>% + model_list() + + m1 <- models[[1]] + m2 <- models[[2]] m1_oof <- oof_grab(m1) listed_oof <- oof_grab(list(m1, m2)) @@ -14,26 +18,36 @@ test_that("oof_grab works with one model or a list", { }) test_that("oof_grab returns identical outcomes as training data", { - m1 <- readRDS(here::here("models_one", "model1.RDS")) - m1_oof <- oof_grab(m1) + m1 <- board() %>% + filter(id == 1) %>% + model_list() %>% + .[[1]] + m1_oof <- oof_grab(m1) expect_identical(m1_oof$Species, iris$Species) }) test_that("oof_grab handles probabilities", { - m1 <- readRDS(here::here("models_one", "model1.RDS")) - m1_oof <- oof_grab(m1, type = 'prob') + models <- board() %>% + filter(id %in% c(1, 2)) %>% + model_list() + m1 <- models[[1]] + m2 <- models[[2]] + + m1_oof <- oof_grab(m1, type = 'prob') expect_equal(length(m1_oof), length(unique(iris$Species)) + 1) - m2 <- readRDS(here::here("models_one", "model2.RDS")) listed_oof <- oof_grab(list(m1, m2), type = 'prob') - expect_equal(length(listed_oof), length(unique(iris$Species)) * 2 + 1) }) test_that("oof_grab throws an error for invalid type", { - m1 <- readRDS(here::here("models_one", "model1.RDS")) + m1 <- board() %>% + filter(id == 1) %>% + model_list() %>% + .[[1]] + expect_error(m1_oof(m1, type = "wrong_type")) }) diff --git a/tests/testthat/test-zzw-model-list.R b/tests/testthat/test-zzw-model-list.R new file mode 100644 index 0000000..3330e9f --- /dev/null +++ b/tests/testthat/test-zzw-model-list.R @@ -0,0 +1,30 @@ +context("model_list") + + +test_that("model_list returns the correct models", { + filtered_board <- board() %>% + dplyr::filter(group == 1) + + model_list <- filtered_board %>% model_list() + expect_identical( + model_list[[1]]$control$seeds, + filtered_board$seeds[[1]] + ) + expect_identical( + model_list[[2]]$control$seeds, + filtered_board$seeds[[2]] + ) +}) + +test_that("model_list returns a warning if directory or model doesn't exist", { + filtered_board <- board() %>% + dplyr::filter(group == 1) + + filtered_board$id <- c(100, 101) + filtered_board$dir <- c("initial", "not_exist") + + expect_warning(model_list(filtered_board[1,]), "The file *") + expect_warning(model_list(filtered_board[2,]), "The directory *") +}) + + diff --git a/tests/testthat/test-zzw-to-list.R b/tests/testthat/test-zzw-to-list.R deleted file mode 100644 index a47d452..0000000 --- a/tests/testthat/test-zzw-to-list.R +++ /dev/null @@ -1,30 +0,0 @@ -context("to_list") - - -test_that("to_list returns the correct models", { - filtered_board <- board() %>% - dplyr::filter(group == 1) - - model_list <- filtered_board %>% to_list() - expect_identical( - model_list[[1]]$control$seeds, - filtered_board$seeds[[1]] - ) - expect_identical( - model_list[[2]]$control$seeds, - filtered_board$seeds[[2]] - ) -}) - -test_that("to_list returns a warning if directory or model doesn't exist", { - filtered_board <- board() %>% - dplyr::filter(group == 1) - - filtered_board$id <- c(100, 101) - filtered_board$dir <- c("models_one", "not_exist") - - expect_warning(to_list(filtered_board[1,]), "The file *") - expect_warning(to_list(filtered_board[2,]), "The directory *") -}) - - diff --git a/tests/testthat/test-zzz-clean-up.R b/tests/testthat/test-zzz-clean-up.R index f607a5b..9a5bf0d 100644 --- a/tests/testthat/test-zzz-clean-up.R +++ b/tests/testthat/test-zzz-clean-up.R @@ -1,12 +1,12 @@ context("clean-up") test_that("Clean-up successful", { - # skip("save some models to work with") + #skip("save some models to work with") - unlink(here::here("models_one"), recursive = TRUE) + unlink(here::here("models"), recursive = TRUE) unlink(here::here("leadrboard.RDS")) - expect_false(dir.exists(here::here("models_one"))) + expect_false(dir.exists(here::here("models"))) expect_false(file.exists(here::here("leadrboard.RDS"))) }) From 6022e29d47097f5b90e0a5a95160d4c128e2805a Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Fri, 20 Apr 2018 20:26:44 -0500 Subject: [PATCH 06/14] replaced paste0 with tidy eval solution. closes #22 --- R/model_tools.R | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/R/model_tools.R b/R/model_tools.R index 40be67f..929e821 100644 --- a/R/model_tools.R +++ b/R/model_tools.R @@ -62,11 +62,16 @@ orderer <- function(data) { save_filter <- function(model) { if (nrow(model$pred) != nrow(model$trainingData)) { - column_names <- names(model$bestTune) - column_values <- model$bestTune - filtered <- model$pred %>% - dplyr::filter(!!rlang::parse_expr(paste(column_names, "==", shQuote(column_values), collapse = "&"))) - return(filtered) + col_names <- names(model$bestTune) + col_values <- model$bestTune + filtered_pred <- model$pred %>% + dplyr::filter( + !!!purrr::map2( + col_names, col_values, + ~rlang::quo(!!rlang::sym(.x) == !!.y) + ) + ) + return(filtered_pred) } model$pred } From ef7b8b15a4e6a26c92726f5958122a527ddb7195 Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Fri, 20 Apr 2018 20:41:43 -0500 Subject: [PATCH 07/14] cleaned up functions in #22 --- R/model_tools.R | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/R/model_tools.R b/R/model_tools.R index 929e821..a58cda9 100644 --- a/R/model_tools.R +++ b/R/model_tools.R @@ -39,7 +39,7 @@ grabber <- function(model, type) { if (is.null(grab)) stop("Not a valid type. Use raw or prob.") - pred_data <- save_filter(model) + pred_data <- tune_filter(model) grab(pred_data, model) } @@ -60,26 +60,22 @@ orderer <- function(data) { order(data$rowIndex) } -save_filter <- function(model) { - if (nrow(model$pred) != nrow(model$trainingData)) { - col_names <- names(model$bestTune) - col_values <- model$bestTune - filtered_pred <- model$pred %>% - dplyr::filter( - !!!purrr::map2( - col_names, col_values, - ~rlang::quo(!!rlang::sym(.x) == !!.y) - ) +tune_filter <- function(model) { + col_names <- names(model$bestTune) + col_values <- model$bestTune + filtered_pred <- model$pred %>% + dplyr::filter( + !!!purrr::map2( + col_names, col_values, + ~rlang::quo(!!rlang::sym(.x) == !!.y) ) - return(filtered_pred) - } - model$pred + ) } add_observed <- function(agg_data, model) { outcome <- attr(model$terms, "variables")[[2]] - data <- save_filter(model) + data <- tune_filter(model) observed <- data$obs[orderer(data)] agg_data <- agg_data %>% tibble::add_column(!!outcome := observed) From 4ceb3f83194832d8238216d739bcd057b15e3c3e Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Fri, 20 Apr 2018 22:33:18 -0500 Subject: [PATCH 08/14] updated documentation based on new changes --- R/board.R | 22 ++++++++-------------- R/model_tools.R | 11 ++++++----- man/board.Rd | 22 ++++++++-------------- man/model_list.Rd | 9 +++++---- man/oof_grab.Rd | 2 +- 5 files changed, 28 insertions(+), 38 deletions(-) diff --git a/R/board.R b/R/board.R index cd807fd..496aa3d 100644 --- a/R/board.R +++ b/R/board.R @@ -10,14 +10,13 @@ #' @param model model to add to the leaderboard. If no model is supplied #' (the default \code{null}), \code{board} returns the leaderboard tibble #' for the project. -#' @param path globally sets the path to save models and leaderboards. By -#' default, the path is the project directory found by. For best results, the path +#' @param path the path to the saved model directory. By +#' default, the path is the project root. For best results, the path #' string should be constructed with \code{file.path} or #' \href{https://github.com/krlmlr/here}{\code{here::here()}}. -#' @param dir globally sets name of directory where models are saved. +#' @param dir the name of directory where the model is saved. #' This will be a subdirectory of the specified path. The default directory -#' is named \code{models_one}. If no argument is supplied, the model will be saved -#' in the previously specified directory. See the example below. +#' is named \code{initial}. This is commonly used to group similar models. #' @param save whether \code{board} should save the supplied model to \code{dir}. If #' \code{FALSE} the model will not be saved, but will be added to the leaderboard. #' @param quiet whether \code{board} should return the leaderboard tibble to the console. @@ -29,21 +28,16 @@ #' #' @examples #' # add caret model to leaderboard -#' # model saved to "models_one" +#' # model saved to "file.path("models", "initial")" #' model <- train(...) -#' leadr::board(model) +#' board(model) #' #' # return tibble leaderboard -#' leadr::board() +#' board() #' #' # save to different directory #' ensemble <- train(...) -#' leadr::board(ensemble, dir = "ensembles_one") -#' -#' # board automatically saves to previous directory -#' # model saved to "ensembles_one" -#' ensemble2 <- train(...) -#' leadr::board(ensembles2) +#' board(ensemble, dir = "ensembles") #' #' @importFrom magrittr %>% #' @export diff --git a/R/model_tools.R b/R/model_tools.R index a58cda9..1a52d3b 100644 --- a/R/model_tools.R +++ b/R/model_tools.R @@ -16,7 +16,7 @@ #' n is the number of labels in the outcome. #' #' @examples -#' oofs <- oof_grab(model_list) +#' oofs <- oof_grab(models) #' #' @importFrom magrittr %>% #' @export @@ -81,20 +81,21 @@ add_observed <- function(agg_data, model) { tibble::add_column(!!outcome := observed) } -#' Convert (subset) of the leaderboard tibble to a list of models +#' Convert the leaderboard tibble to a list of models #' #' Given a possibly filtered leaderboard tibble from \code{\link{board}}, -#' \code{to_list} returns every model in a list. +#' \code{model_list} returns every model in a list. Each entry in the +#' list in named by the model method and id. #' #' @param leadrboard the leaderboard tibble, or a filtered verison of it #' from \code{\link{board}} #' -#' @return a list of caret models (\code{train} objects) +#' @return a named list of caret models (\code{train} objects) #' #' @examples #' model_list <- board() %>% #' filter(group == 1) %>% -#' to_list() +#' model_list() #' #' @export model_list <- function(leadrboard) { diff --git a/man/board.Rd b/man/board.Rd index a9d3de6..ed74d96 100644 --- a/man/board.Rd +++ b/man/board.Rd @@ -12,15 +12,14 @@ board(model = NULL, path = here::here("models"), dir = "initial", (the default \code{null}), \code{board} returns the leaderboard tibble for the project.} -\item{path}{globally sets the path to save models and leaderboards. By -default, the path is the project directory found by. For best results, the path +\item{path}{the path to the saved model directory. By +default, the path is the project root. For best results, the path string should be constructed with \code{file.path} or \href{https://github.com/krlmlr/here}{\code{here::here()}}.} -\item{dir}{globally sets name of directory where models are saved. +\item{dir}{the name of directory where the model is saved. This will be a subdirectory of the specified path. The default directory -is named \code{models_one}. If no argument is supplied, the model will be saved -in the previously specified directory. See the example below.} +is named \code{initial}. This is commonly used to group similar models.} \item{save}{whether \code{board} should save the supplied model to \code{dir}. If \code{FALSE} the model will not be saved, but will be added to the leaderboard.} @@ -42,20 +41,15 @@ introduction } \examples{ # add caret model to leaderboard -# model saved to "models_one" +# model saved to "file.path("models", "initial")" model <- train(...) -leadr::board(model) +board(model) # return tibble leaderboard -leadr::board() +board() # save to different directory ensemble <- train(...) -leadr::board(ensemble, dir = "ensembles_one") - -# board automatically saves to previous directory -# model saved to "ensembles_one" -ensemble2 <- train(...) -leadr::board(ensembles2) +board(ensemble, dir = "ensembles") } diff --git a/man/model_list.Rd b/man/model_list.Rd index e59f253..74a4f80 100644 --- a/man/model_list.Rd +++ b/man/model_list.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/model_tools.R \name{model_list} \alias{model_list} -\title{Convert (subset) of the leaderboard tibble to a list of models} +\title{Convert the leaderboard tibble to a list of models} \usage{ model_list(leadrboard) } @@ -11,15 +11,16 @@ model_list(leadrboard) from \code{\link{board}}} } \value{ -a list of caret models (\code{train} objects) +a named list of caret models (\code{train} objects) } \description{ Given a possibly filtered leaderboard tibble from \code{\link{board}}, -\code{to_list} returns every model in a list. +\code{model_list} returns every model in a list. Each entry in the +list in named by the model method and id. } \examples{ model_list <- board() \%>\% filter(group == 1) \%>\% - to_list() + model_list() } diff --git a/man/oof_grab.Rd b/man/oof_grab.Rd index 389a0b9..4163afa 100644 --- a/man/oof_grab.Rd +++ b/man/oof_grab.Rd @@ -26,6 +26,6 @@ to make stacked or blended ensembles. See the ensemble for examples. } \examples{ -oofs <- oof_grab(model_list) +oofs <- oof_grab(models) } From f210ef50be2880261b216faf355bf551114c78d4 Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Sat, 21 Apr 2018 15:05:50 -0500 Subject: [PATCH 09/14] change directory tree in README --- README.Rmd | 7 ++++--- README.md | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/README.Rmd b/README.Rmd index 2c18686..b7483be 100644 --- a/README.Rmd +++ b/README.Rmd @@ -65,10 +65,11 @@ leadr::board(model) . ├── iris.Rproj ├── leadrboard.RDS -└── models_one - └── model1.RDS +└── models + └── initial + └── model1.RDS ``` -At the project root, `board` saves the leaderboard tibble as a `.RDS` file and creates a subdirectory (named `/models_one` by default) to save all the models. All future models passed to `board` will be added to the leaderboard and saved in the directory, unless otherwise specified. +By default, `board` saves the leaderboard tibble as a `.RDS` file at the project root and creates the `models` directory. Within `models`, each `caret` model is saved in a subdirectory and named in the order they were ran. ## Interactive diff --git a/README.md b/README.md index 4e81b33..6151d46 100644 --- a/README.md +++ b/README.md @@ -53,10 +53,11 @@ leadr::board(model) . ├── iris.Rproj ├── leadrboard.RDS - └── models_one - └── model1.RDS + └── models + └── initial + └── model1.RDS -At the project root, `board` saves the leaderboard tibble as a `.RDS` file and creates a subdirectory (named `/models_one` by default) to save all the models. All future models passed to `board` will be added to the leaderboard and saved in the directory, unless otherwise specified. +By default, `board` saves the leaderboard tibble as a `.RDS` file at the project root and creates a directory `models`. Within `models`, each `caret` model is saved in a subdirectory and named in the order they were ran. Interactive ----------- From ee1cc6ce22c1431392ad0a93530a144d25952011 Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Sun, 22 Apr 2018 19:43:09 -0500 Subject: [PATCH 10/14] implements #21. Still need example documentation --- NAMESPACE | 3 + R/board.R | 2 +- R/ensemble_tools.R | 83 +++++++++++++++++++++ R/model_tools.R | 178 ++++++++++++++++++++++++--------------------- man/as_argument.Rd | 29 ++++++++ man/modeler.Rd | 34 +++++++++ man/oof_grab.Rd | 2 +- man/run.Rd | 34 +++++++++ 8 files changed, 280 insertions(+), 85 deletions(-) create mode 100644 R/ensemble_tools.R create mode 100644 man/as_argument.Rd create mode 100644 man/modeler.Rd create mode 100644 man/run.Rd diff --git a/NAMESPACE b/NAMESPACE index 25e7113..ef38be5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,14 +6,17 @@ S3method(format,id) S3method(pillar_shaft,id) S3method(print,id) S3method(type_sum,id) +export(as_argument) export(as_id) export(at_last) export(board) export(git_ignore_models) export(id) export(model_list) +export(modeler) export(oof_grab) export(peak) +export(run) importFrom(crayon,bgGreen) importFrom(crayon,black) importFrom(magrittr,"%>%") diff --git a/R/board.R b/R/board.R index 496aa3d..8aaecbf 100644 --- a/R/board.R +++ b/R/board.R @@ -103,7 +103,7 @@ add_to <- function(leadrboard, model, id, dir, path) { new_row$num = model$control$number new_row$group = NA new_row$index = list(model$control$index) - new_row$tune = list(as.list(model$bestTune)) + new_row$tune = list(model$bestTune) new_row$seeds = list(model$control$seeds) new_row$path = path } else { diff --git a/R/ensemble_tools.R b/R/ensemble_tools.R new file mode 100644 index 0000000..39f5bfa --- /dev/null +++ b/R/ensemble_tools.R @@ -0,0 +1,83 @@ +#' Return out of fold model predictions +#' +#' Given a model or list of models, this function returns the +#' out of fold predictions. These out of fold predictions can be used +#' to make stacked or blended ensembles. See the ensemble +#' \href{https://tmastny.github.io/leadr/articles/ensemble.html}{vignette} +#' for examples. +#' +#' @param models A model or list of models to get the predictions +#' @param type the results of the prediction. For classification models, +#' \code{"raw"} returns the outcome label and \code{"prob"} returns the +#' label probabilities. +#' +#' @return a tibble with one column per model and a column of the training data +#' outcomes. If \code{type = "prob"} there will be n columns per model, where +#' n is the number of labels in the outcome. +#' +#' @examples +#' oofs <- oof_grab(models) +#' +#' @importFrom magrittr %>% +#' @export +oof_grab <- function(models, type = "raw") { + if (inherits(models, "train")) models <- list(models) + + agg_data <- purrr::map_dfc(models, grabber, type) + agg_data <- agg_data %>% add_observed(models[[1]]) + agg_data +} + +grabber <- function(model, type) { + + if (is.null(model$pred)) { + stop("Out of fold predictions were not saved in the caret model. ", + "Re-run with savePredictions = 'final' or TRUE in trainControl.") + } + grabbers <- list(raw = pred_grabber, prob = prob_grabber) + grab <- grabbers[[type]] + + if (is.null(grab)) stop("Not a valid type. Use raw or prob.") + + pred_data <- tune_filter(model) + grab(pred_data, model) +} + +prob_grabber <- function(data, model) { + columns <- as.character(unique(model$trainingData$.outcome)) + if (all(!columns %in% names(data))) { + stop("Probabilities were not saved, or are not available in the caret model. ", + "Re-run with classProbs = TRUE in trainControl.") + } + tibble::as_tibble(data[orderer(data), columns]) +} + +pred_grabber <- function(data, model) { + tibble::as_tibble(data$pred[orderer(data)]) +} + +orderer <- function(data) { + order(data$rowIndex) +} + +tune_filter <- function(model) { + col_names <- names(model$bestTune) + col_values <- model$bestTune + filtered_pred <- model$pred %>% + dplyr::filter( + !!!purrr::map2( + col_names, col_values, + ~rlang::quo(!!rlang::sym(.x) == !!.y) + ) + ) +} + +add_observed <- function(agg_data, model) { + outcome <- attr(model$terms, "variables")[[2]] + + data <- tune_filter(model) + observed <- data$obs[orderer(data)] + agg_data <- agg_data %>% + tibble::add_column(!!outcome := observed) +} + diff --git a/R/model_tools.R b/R/model_tools.R index 1a52d3b..b51a497 100644 --- a/R/model_tools.R +++ b/R/model_tools.R @@ -1,86 +1,3 @@ -#' Return out of fold model predictions -#' -#' Given a model or list of models, this function returns the -#' out of fold predictions. These out of fold predictions can be used -#' to make stacked or blended ensembles. See the ensemble -#' \href{https://tmastny.github.io/leadr/articles/ensemble.html}{vignette} -#' for examples. -#' -#' @param models A model or list of models to get the predictions -#' @param type the results of the prediction. For classification models, -#' \code{"raw"} returns the outcome label and \code{"prob"} returns the -#' label probabilities. -#' -#' @return a tibble with one column per model and a column of the training data -#' outcomes. If \code{type = "prob"} there will be n columns per model, where -#' n is the number of labels in the outcome. -#' -#' @examples -#' oofs <- oof_grab(models) -#' -#' @importFrom magrittr %>% -#' @export -oof_grab <- function(models, type = "raw") { - if (inherits(models, "train")) models <- list(models) - - agg_data <- purrr::map_dfc(models, grabber, type) - agg_data <- agg_data %>% add_observed(models[[1]]) - agg_data -} - -grabber <- function(model, type) { - - if (is.null(model$pred)) { - stop("Out of fold predictions were not saved in the caret model. ", - "Re-run with savePredictions = 'final' or TRUE in trainControl.") - } - grabbers <- list(raw = pred_grabber, prob = prob_grabber) - grab <- grabbers[[type]] - - if (is.null(grab)) stop("Not a valid type. Use raw or prob.") - - pred_data <- tune_filter(model) - grab(pred_data, model) -} - -prob_grabber <- function(data, model) { - columns <- as.character(unique(model$trainingData$.outcome)) - if (all(!columns %in% names(data))) { - stop("Probabilities were not saved, or are not available in the caret model. ", - "Re-run with classProbs = TRUE in trainControl.") - } - tibble::as_tibble(data[orderer(data), columns]) -} - -pred_grabber <- function(data, model) { - tibble::as_tibble(data$pred[orderer(data)]) -} - -orderer <- function(data) { - order(data$rowIndex) -} - -tune_filter <- function(model) { - col_names <- names(model$bestTune) - col_values <- model$bestTune - filtered_pred <- model$pred %>% - dplyr::filter( - !!!purrr::map2( - col_names, col_values, - ~rlang::quo(!!rlang::sym(.x) == !!.y) - ) - ) -} - -add_observed <- function(agg_data, model) { - outcome <- attr(model$terms, "variables")[[2]] - - data <- tune_filter(model) - observed <- data$obs[orderer(data)] - agg_data <- agg_data %>% - tibble::add_column(!!outcome := observed) -} - #' Convert the leaderboard tibble to a list of models #' #' Given a possibly filtered leaderboard tibble from \code{\link{board}}, @@ -97,6 +14,7 @@ add_observed <- function(agg_data, model) { #' filter(group == 1) %>% #' model_list() #' +#' @importFrom magrittr %>% #' @export model_list <- function(leadrboard) { model_locations <- list( @@ -130,3 +48,97 @@ get_model <- function(path, dir, id) { } readRDS(file_path) } + +#' Build list of model meta-data +#' +#' Extracts the model meta-data from \code{\link{board}} into a list. +#' This data can be used to exactly reproduce the model. Can be passed +#' to \code{\link{run}}. +#' +#' @param leadrboard the leaderboard tibble, or a filtered verison of it +#' from \code{\link{board}} +#' +#' @return a named list of model meta-data that can be based to a function +#' that builds a caret \code{train} model +#' +#' @examples +#' parameters <- board() %>% +#' filter(id == 1) %>% +#' as_argument() +#' +#' run(modeler, data, parameters) +#' +#' @export +as_argument <- function(leadrboard) { + leadrboard %>% + select(method, num, index, seeds, method, model, tune) %>% + as.list() %>% + purrr::map(~.[[1]]) +} + +#' Run function taking the model meta-data +#' +#' Runs a function that takes a data to train on and a +#' list of parameters. +#' +#' @param modeler user created function that is a wrapper around +#' caret \code{train} +#' +#' @param data training data for the caret \code{train} function +#' +#' @param parameters parameters that go into the modeler wrapper. If +#' a previous model is being rerun, use \code{\link{as_argument}} +#' to extract model meta-data from the \code{\link{board}} +#' +#' @return returns the return value of the \code{modeler} function +#' supplied, which should be a caret \code{train} object. +#' +#' @examples +#' parameters <- board() %>% +#' filter(id == 1) %>% +#' as_argument() +#' +#' run(modeler, data, parameters) +#' +#' @export +run <- function(modeler, data, parameters) { + do.call(modeler, c(list(data = data), parameters)) +} + +#' Wrapper around caret \code{train} +#' +#' This is a wrapper around caret \code{train} that accepts the +#' model meta-data from \code{\link{board}}. This function is used +#' to exactly reproduce models in the leader board, or run +#' new models. +#' +#' The parameters correspond to the arguments in caret's +#' \code{trainControl} and \code{train} functions. +#' +#' The source code for this function is also an example how to +#' create your own modeler function for your own needs. +#' +#' Note: you must manually load the caret package to use this function. +#' caret is not a dependency of leadr. +#' +#' @return a caret \code{train} object +#' +#' @examples +#' library(caret) +#' library(leadr) +#' modeler(iris, model = "rf") +#' +#' @export +modeler <- function(data, method = "cv", num = 5, + index = NULL, seeds = NA, model, + tune = NULL) { + control <- trainControl(method = method, number = num, + savePredictions = 'final', index = index) + train( + Species ~ ., + data = data, + method = model, + trControl = control, + tuneGrid = tune + ) +} diff --git a/man/as_argument.Rd b/man/as_argument.Rd new file mode 100644 index 0000000..2ab7451 --- /dev/null +++ b/man/as_argument.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/model_tools.R +\name{as_argument} +\alias{as_argument} +\title{Build list of model meta-data} +\usage{ +as_argument(leadrboard) +} +\arguments{ +\item{leadrboard}{the leaderboard tibble, or a filtered verison of it +from \code{\link{board}}} +} +\value{ +a named list of model meta-data that can be based to a function +that builds a caret \code{train} model +} +\description{ +Extracts the model meta-data from \code{\link{board}} into a list. +This data can be used to exactly reproduce the model. Can be passed +to \code{\link{run}}. +} +\examples{ +parameters <- board() \%>\% + filter(id == 1) \%>\% + as_argument() + +run(modeler, data, parameters) + +} diff --git a/man/modeler.Rd b/man/modeler.Rd new file mode 100644 index 0000000..6144fc4 --- /dev/null +++ b/man/modeler.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/model_tools.R +\name{modeler} +\alias{modeler} +\title{Wrapper around caret \code{train}} +\usage{ +modeler(data, method = "cv", num = 5, index = NULL, seeds = NA, model, + tune = NULL) +} +\value{ +a caret \code{train} object +} +\description{ +This is a wrapper around caret \code{train} that accepts the +model meta-data from \code{\link{board}}. This function is used +to exactly reproduce models in the leader board, or run +new models. +} +\details{ +The parameters correspond to the arguments in caret's +\code{trainControl} and \code{train} functions. + +The source code for this function is also an example how to +create your own modeler function for your own needs. + +Note: you must manually load the caret package to use this function. +caret is not a dependency of leadr. +} +\examples{ +library(caret) +library(leadr) +modeler(iris, model = "rf") + +} diff --git a/man/oof_grab.Rd b/man/oof_grab.Rd index 4163afa..fe393b0 100644 --- a/man/oof_grab.Rd +++ b/man/oof_grab.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/model_tools.R +% Please edit documentation in R/ensemble_tools.R \name{oof_grab} \alias{oof_grab} \title{Return out of fold model predictions} diff --git a/man/run.Rd b/man/run.Rd new file mode 100644 index 0000000..b7e2642 --- /dev/null +++ b/man/run.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/model_tools.R +\name{run} +\alias{run} +\title{Run function taking the model meta-data} +\usage{ +run(modeler, data, parameters) +} +\arguments{ +\item{modeler}{user created function that is a wrapper around +caret \code{train}} + +\item{data}{training data for the caret \code{train} function} + +\item{parameters}{parameters that go into the modeler wrapper. If +a previous model is being rerun, use \code{\link{as_argument}} +to extract model meta-data from the \code{\link{board}}} +} +\value{ +returns the return value of the \code{modeler} function +supplied, which should be a caret \code{train} object. +} +\description{ +Runs a function that takes a data to train on and a +list of parameters. +} +\examples{ +parameters <- board() \%>\% + filter(id == 1) \%>\% + as_argument() + +run(modeler, data, parameters) + +} From 8d4d3b5202c56d678b1357ce069dfa0e759546f1 Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Sun, 22 Apr 2018 19:53:42 -0500 Subject: [PATCH 11/14] adds formula option to #21 --- R/model_tools.R | 14 ++++++++------ man/modeler.Rd | 6 +++--- man/run.Rd | 6 ++++-- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/R/model_tools.R b/R/model_tools.R index b51a497..dd1659b 100644 --- a/R/model_tools.R +++ b/R/model_tools.R @@ -84,6 +84,8 @@ as_argument <- function(leadrboard) { #' @param modeler user created function that is a wrapper around #' caret \code{train} #' +#' @param formula formula passed to caret \code{train} function +#' #' @param data training data for the caret \code{train} function #' #' @param parameters parameters that go into the modeler wrapper. If @@ -98,11 +100,11 @@ as_argument <- function(leadrboard) { #' filter(id == 1) %>% #' as_argument() #' -#' run(modeler, data, parameters) +#' run(modeler, Species ~ ., iris, parameters) #' #' @export -run <- function(modeler, data, parameters) { - do.call(modeler, c(list(data = data), parameters)) +run <- function(modeler, formula, data, parameters) { + do.call(modeler, c(list(formula = formula, data = data), parameters)) } #' Wrapper around caret \code{train} @@ -126,16 +128,16 @@ run <- function(modeler, data, parameters) { #' @examples #' library(caret) #' library(leadr) -#' modeler(iris, model = "rf") +#' modeler(Species ~ ., iris, model = "rf") #' #' @export -modeler <- function(data, method = "cv", num = 5, +modeler <- function(formula, data, method = "cv", num = 5, index = NULL, seeds = NA, model, tune = NULL) { control <- trainControl(method = method, number = num, savePredictions = 'final', index = index) train( - Species ~ ., + formula, data = data, method = model, trControl = control, diff --git a/man/modeler.Rd b/man/modeler.Rd index 6144fc4..3e283ae 100644 --- a/man/modeler.Rd +++ b/man/modeler.Rd @@ -4,8 +4,8 @@ \alias{modeler} \title{Wrapper around caret \code{train}} \usage{ -modeler(data, method = "cv", num = 5, index = NULL, seeds = NA, model, - tune = NULL) +modeler(formula, data, method = "cv", num = 5, index = NULL, seeds = NA, + model, tune = NULL) } \value{ a caret \code{train} object @@ -29,6 +29,6 @@ caret is not a dependency of leadr. \examples{ library(caret) library(leadr) -modeler(iris, model = "rf") +modeler(Species ~ ., iris, model = "rf") } diff --git a/man/run.Rd b/man/run.Rd index b7e2642..1c357a6 100644 --- a/man/run.Rd +++ b/man/run.Rd @@ -4,12 +4,14 @@ \alias{run} \title{Run function taking the model meta-data} \usage{ -run(modeler, data, parameters) +run(modeler, formula, data, parameters) } \arguments{ \item{modeler}{user created function that is a wrapper around caret \code{train}} +\item{formula}{formula passed to caret \code{train} function} + \item{data}{training data for the caret \code{train} function} \item{parameters}{parameters that go into the modeler wrapper. If @@ -29,6 +31,6 @@ parameters <- board() \%>\% filter(id == 1) \%>\% as_argument() -run(modeler, data, parameters) +run(modeler, Species ~ ., iris, parameters) } From cc206d2588532dc7b723ef9ded507f4d260654a3 Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Sun, 22 Apr 2018 19:56:17 -0500 Subject: [PATCH 12/14] reference to do.call in `run` documentation --- R/model_tools.R | 2 +- man/run.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/model_tools.R b/R/model_tools.R index dd1659b..56063e1 100644 --- a/R/model_tools.R +++ b/R/model_tools.R @@ -79,7 +79,7 @@ as_argument <- function(leadrboard) { #' Run function taking the model meta-data #' #' Runs a function that takes a data to train on and a -#' list of parameters. +#' list of parameters. This is a wrapper around \code{\link{do.call}}. #' #' @param modeler user created function that is a wrapper around #' caret \code{train} diff --git a/man/run.Rd b/man/run.Rd index 1c357a6..6e8953c 100644 --- a/man/run.Rd +++ b/man/run.Rd @@ -24,7 +24,7 @@ supplied, which should be a caret \code{train} object. } \description{ Runs a function that takes a data to train on and a -list of parameters. +list of parameters. This is a wrapper around \code{\link{do.call}}. } \examples{ parameters <- board() \%>\% From 5c813232f1d14acb0c31729e294d20221db477bc Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Tue, 8 May 2018 21:34:19 -0500 Subject: [PATCH 13/14] renamed `quiet` to `invisible`. quiet has a different connotation, and invisible is the name of the actual function --- R/board.R | 10 +++++----- man/board.Rd | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/board.R b/R/board.R index 8aaecbf..dafc170 100644 --- a/R/board.R +++ b/R/board.R @@ -19,9 +19,9 @@ #' is named \code{initial}. This is commonly used to group similar models. #' @param save whether \code{board} should save the supplied model to \code{dir}. If #' \code{FALSE} the model will not be saved, but will be added to the leaderboard. -#' @param quiet whether \code{board} should return the leaderboard tibble to the console. -#' By default \code{quiet = FALSE} means that the tibble prints to console. -#' \code{quiet = TRUE} is useful in a \code{.Rmd} environment, where you want to add +#' @param invisible whether \code{board} should return the leaderboard tibble to the console. +#' By default \code{invisible = FALSE} means that the tibble prints to console. +#' \code{invisible = TRUE} is useful in a \code{.Rmd} environment, where you want to add #' the model to the leaderboard without printing the tibble. #' #' @return \code{tibble} containing the most up-to-date leaderboard. @@ -43,7 +43,7 @@ #' @export board <- function( model = NULL, path = here::here("models"), dir = "initial", save = TRUE, - quiet = FALSE) { + invisible = FALSE) { leadrboard <- new_leadrboard() leadrboard_path <- here::here("leadrboard.RDS") @@ -64,7 +64,7 @@ board <- function( } } - if (quiet) return(invisible(leadrboard)) + if (invisible) return(base::invisible(leadrboard)) leadrboard$id <- id(leadrboard$id) leadrboard diff --git a/man/board.Rd b/man/board.Rd index ed74d96..f289a44 100644 --- a/man/board.Rd +++ b/man/board.Rd @@ -5,7 +5,7 @@ \title{A tibble leaderboard for \code{caret} \code{train} objects} \usage{ board(model = NULL, path = here::here("models"), dir = "initial", - save = TRUE, quiet = FALSE) + save = TRUE, invisible = FALSE) } \arguments{ \item{model}{model to add to the leaderboard. If no model is supplied @@ -24,9 +24,9 @@ is named \code{initial}. This is commonly used to group similar models.} \item{save}{whether \code{board} should save the supplied model to \code{dir}. If \code{FALSE} the model will not be saved, but will be added to the leaderboard.} -\item{quiet}{whether \code{board} should return the leaderboard tibble to the console. -By default \code{quiet = FALSE} means that the tibble prints to console. -\code{quiet = TRUE} is useful in a \code{.Rmd} environment, where you want to add +\item{invisible}{whether \code{board} should return the leaderboard tibble to the console. +By default \code{invisible = FALSE} means that the tibble prints to console. +\code{invisible = TRUE} is useful in a \code{.Rmd} environment, where you want to add the model to the leaderboard without printing the tibble.} } \value{ From a34125c14a19414307911c6d81b1d139dafb7dcb Mon Sep 17 00:00:00 2001 From: Timothy Mastny Date: Tue, 8 May 2018 22:09:45 -0500 Subject: [PATCH 14/14] leadrboard.RDS is always saved in the current working directory. closes #24 --- R/board.R | 21 ++++++++++++--------- R/peak.R | 2 +- man/board.Rd | 17 ++++++++++------- tests/testthat/test-aaa-directory.R | 2 +- tests/testthat/test-board.R | 8 ++++---- tests/testthat/test-zzz-clean-up.R | 8 ++++---- 6 files changed, 32 insertions(+), 26 deletions(-) diff --git a/R/board.R b/R/board.R index dafc170..be0b3b4 100644 --- a/R/board.R +++ b/R/board.R @@ -1,7 +1,9 @@ #' A tibble leaderboard for \code{caret} \code{train} objects #' #' This function updates and returns -#' the model leaderboard for the project. Please read the +#' the model leaderboard for the project. The function also saves a leaderboard +#' tibble in the current working directory and models to a subdirectory. +#' Please read the #' \href{https://github.com/tmastny/leadr}{README} and the #' introduction #' \href{https://tmastny.github.io/leadr/articles/introduction.html}{vignette}. @@ -10,9 +12,9 @@ #' @param model model to add to the leaderboard. If no model is supplied #' (the default \code{null}), \code{board} returns the leaderboard tibble #' for the project. -#' @param path the path to the saved model directory. By -#' default, the path is the project root. For best results, the path -#' string should be constructed with \code{file.path} or +#' @param path the path to the saved model directory. The default path +#' is to a folder called \code{models} in the current working directory. +#' For best results, the path string should be constructed with \code{file.path} or #' \href{https://github.com/krlmlr/here}{\code{here::here()}}. #' @param dir the name of directory where the model is saved. #' This will be a subdirectory of the specified path. The default directory @@ -24,11 +26,12 @@ #' \code{invisible = TRUE} is useful in a \code{.Rmd} environment, where you want to add #' the model to the leaderboard without printing the tibble. #' -#' @return \code{tibble} containing the most up-to-date leaderboard. +#' @return \code{tibble} containing the most up-to-date leaderboard. Also has the +#' side-effects of creating and/or saving \code{leadrboard.RDS} to the current +#' working directory, and saving the supplied model to the subdirectory. #' #' @examples #' # add caret model to leaderboard -#' # model saved to "file.path("models", "initial")" #' model <- train(...) #' board(model) #' @@ -42,11 +45,11 @@ #' @importFrom magrittr %>% #' @export board <- function( - model = NULL, path = here::here("models"), dir = "initial", save = TRUE, - invisible = FALSE) { + model = NULL, path = file.path(getwd(), "models"), dir = "initial", + save = TRUE, invisible = FALSE) { leadrboard <- new_leadrboard() - leadrboard_path <- here::here("leadrboard.RDS") + leadrboard_path <- file.path(getwd(), "leadrboard.RDS") if (file.exists(leadrboard_path)) leadrboard <- readRDS(leadrboard_path) diff --git a/R/peak.R b/R/peak.R index b360151..33b017c 100644 --- a/R/peak.R +++ b/R/peak.R @@ -101,7 +101,7 @@ return_pos <- function(models, id, window_size = 10, place) { #' @export at_last <- function(number = 1) { number <- 1:number - load_path <- file.path(here::here(), "leadrboard.RDS") + load_path <- file.path(getwd(), "leadrboard.RDS") nrow(readRDS(load_path)) - number + 1 } diff --git a/man/board.Rd b/man/board.Rd index f289a44..52d0389 100644 --- a/man/board.Rd +++ b/man/board.Rd @@ -4,7 +4,7 @@ \alias{board} \title{A tibble leaderboard for \code{caret} \code{train} objects} \usage{ -board(model = NULL, path = here::here("models"), dir = "initial", +board(model = NULL, path = file.path(getwd(), "models"), dir = "initial", save = TRUE, invisible = FALSE) } \arguments{ @@ -12,9 +12,9 @@ board(model = NULL, path = here::here("models"), dir = "initial", (the default \code{null}), \code{board} returns the leaderboard tibble for the project.} -\item{path}{the path to the saved model directory. By -default, the path is the project root. For best results, the path -string should be constructed with \code{file.path} or +\item{path}{the path to the saved model directory. The default path +is to a folder called \code{models} in the current working directory. +For best results, the path string should be constructed with \code{file.path} or \href{https://github.com/krlmlr/here}{\code{here::here()}}.} \item{dir}{the name of directory where the model is saved. @@ -30,18 +30,21 @@ By default \code{invisible = FALSE} means that the tibble prints to console. the model to the leaderboard without printing the tibble.} } \value{ -\code{tibble} containing the most up-to-date leaderboard. +\code{tibble} containing the most up-to-date leaderboard. Also has the +side-effects of creating and/or saving \code{leadrboard.RDS} to the current +working directory, and saving the supplied model to the subdirectory. } \description{ This function updates and returns -the model leaderboard for the project. Please read the +the model leaderboard for the project. The function also saves a leaderboard +tibble in the current working directory and models to a subdirectory. +Please read the \href{https://github.com/tmastny/leadr}{README} and the introduction \href{https://tmastny.github.io/leadr/articles/introduction.html}{vignette}. } \examples{ # add caret model to leaderboard -# model saved to "file.path("models", "initial")" model <- train(...) board(model) diff --git a/tests/testthat/test-aaa-directory.R b/tests/testthat/test-aaa-directory.R index 6e9915d..3e6d9df 100644 --- a/tests/testthat/test-aaa-directory.R +++ b/tests/testthat/test-aaa-directory.R @@ -22,7 +22,7 @@ test_that("board can save models to a different directory", { }) test_that("board can have model directory in non-root folder", { - not_root <- here::here("not_root_dir") + not_root <- file.path(getwd(), "not_root_dir") new_path <- file.path(not_root, "new_models") model <- train(Species ~ ., data = iris, method = 'rf') diff --git a/tests/testthat/test-board.R b/tests/testthat/test-board.R index 2dd57ba..9b5decb 100644 --- a/tests/testthat/test-board.R +++ b/tests/testthat/test-board.R @@ -5,8 +5,8 @@ library(caret) test_that("Model is saved to directory", { skip_if_not_installed('randomForest') - unlink(here::here("models_one"), recursive = TRUE) - unlink(here::here("leadrboard.RDS")) + unlink(file.path(getwd(), "models"), recursive = TRUE) + unlink(file.path(getwd(),"leadrboard.RDS")) control <- trainControl( method = "cv", @@ -25,7 +25,7 @@ test_that("Model is saved to directory", { board(model) - expect_true(file.exists(here::here("leadrboard.RDS"))) + expect_true(file.exists(file.path(getwd(), "leadrboard.RDS"))) path_dir <- board() %>% filter(id == at_last()) %>% @@ -57,7 +57,7 @@ test_that("Next model", { board(model) - expect_true(file.exists(here::here("leadrboard.RDS"))) + expect_true(file.exists(file.path(getwd(), "leadrboard.RDS"))) path_dir <- board() %>% filter(id == at_last()) %>% diff --git a/tests/testthat/test-zzz-clean-up.R b/tests/testthat/test-zzz-clean-up.R index 9a5bf0d..1930d48 100644 --- a/tests/testthat/test-zzz-clean-up.R +++ b/tests/testthat/test-zzz-clean-up.R @@ -3,11 +3,11 @@ context("clean-up") test_that("Clean-up successful", { #skip("save some models to work with") - unlink(here::here("models"), recursive = TRUE) - unlink(here::here("leadrboard.RDS")) + unlink(file.path(getwd(), "models"), recursive = TRUE) + unlink(file.path(getwd(), "leadrboard.RDS")) - expect_false(dir.exists(here::here("models"))) - expect_false(file.exists(here::here("leadrboard.RDS"))) + expect_false(dir.exists(file.path(getwd(), "models"))) + expect_false(file.exists(file.path(getwd(), "leadrboard.RDS"))) })