From 5f6ce3b5d5f30180a4d6841fd5ce9cdf9990b291 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 29 May 2024 16:20:58 -0700 Subject: [PATCH 1/2] add ignore_step function --- NEWS.md | 2 + R/ignore_step.R | 64 ++++++++++++++++ _pkgdown.yml | 1 + man/ignore_step.Rd | 37 +++++++++ tests/testthat/_snaps/ignore_step.md | 64 ++++++++++++++++ tests/testthat/test-ignore_step.R | 108 +++++++++++++++++++++++++++ 6 files changed, 276 insertions(+) create mode 100644 R/ignore_step.R create mode 100644 man/ignore_step.Rd create mode 100644 tests/testthat/_snaps/ignore_step.md create mode 100644 tests/testthat/test-ignore_step.R diff --git a/NEWS.md b/NEWS.md index c87c4ea95..a193685ce 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,6 +20,8 @@ * `step_mutate()` gained `.pkgs` argument to specify what packages need to be loaded for step to work. (#1282) +* Added `ignore_step()` to modify untrained recipes by removing steps from them. (#887) + * Added more documentation in `?selections` about how `tidyselect::everything()` works in recipes. (#1259) * Improved error message for misspelled argument in step functions. (#1318) diff --git a/R/ignore_step.R b/R/ignore_step.R new file mode 100644 index 000000000..162018b48 --- /dev/null +++ b/R/ignore_step.R @@ -0,0 +1,64 @@ +#' Remove steps from recipe +#' +#' `ignore_step` will return a recipe without steps specified by the `number` or +#' `id` argument. + +#' @param x A `recipe` object. +#' @param number An integer vector, Denoting the positions of the steps that +#' should be removed. +#' @param id A character string. Denoting the `id` of the steps that should be +#' removed. +#' +#' @details +#' `number` or `id` must be specified. Specifying neither or both will result +#' in a error. +#' +#' @return a `recipe` object. +#' +#' @examplesIf rlang::is_installed("modeldata") +#' rec <- recipe(mpg ~ ., data = mtcars) %>% +#' step_dummy(all_nominal_predictors()) %>% +#' step_impute_mean(all_numeric_predictors()) %>% +#' step_normalize(all_numeric_predictors()) %>% +#' step_pca(all_numeric_predictors(), id = "PCA") +ignore_step <- function(x, number, id) { + if (any(map_lgl(x$steps, is_trained))) { + cli::cli_abort( + "{.arg x} must not contain any trained steps." + ) + } + + n_steps <- length(x$steps) + + if (n_steps == 0) { + cli::cli_abort("{.arg x} doesn't contain any steps to remove.") + } + + arg <- rlang::check_exclusive(number, id) + + if (arg == "number") { + if (any(number < 1 | number > n_steps)) { + offenders <- number[number < 1 | number > n_steps] + cli::cli_abort( + "{.arg number} must only contain values between 1 and {n_steps}. \\ + Not {offenders}." + ) + } + } else { + step_ids <- vapply(x$steps, function(x) x$id, character(1)) + if (!(id %in% step_ids)) { + cli::cli_abort( + "Supplied {.arg id} ({.val {id}}) not found in the recipe." + ) + } + number <- which(id == step_ids) + } + + x$steps <- x$steps[-number] + + if (length(x$steps) == 0) { + x["steps"] <- list(NULL) + } + + x +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 9713e872b..0945f4360 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -47,6 +47,7 @@ reference: - update_role_requirements - get_case_weights - case_weights + - ignore_step - title: Step Functions - Imputation contents: - starts_with("step_impute_") diff --git a/man/ignore_step.Rd b/man/ignore_step.Rd new file mode 100644 index 000000000..07b158a0b --- /dev/null +++ b/man/ignore_step.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ignore_step.R +\name{ignore_step} +\alias{ignore_step} +\title{Remove steps from recipe} +\usage{ +ignore_step(x, number, id) +} +\arguments{ +\item{x}{A \code{recipe} object.} + +\item{number}{An integer vector, Denoting the positions of the steps that +should be removed.} + +\item{id}{A character string. Denoting the \code{id} of the steps that should be +removed.} +} +\value{ +a \code{recipe} object. +} +\description{ +\code{ignore_step} will return a recipe without steps specified by the \code{number} or +\code{id} argument. +} +\details{ +\code{number} or \code{id} must be specified. Specifying neither or both will result +in a error. +} +\examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +rec <- recipe(mpg ~ ., data = mtcars) \%>\% + step_dummy(all_nominal_predictors()) \%>\% + step_impute_mean(all_numeric_predictors()) \%>\% + step_normalize(all_numeric_predictors()) \%>\% + step_pca(all_numeric_predictors(), id = "PCA") +\dontshow{\}) # examplesIf} +} diff --git a/tests/testthat/_snaps/ignore_step.md b/tests/testthat/_snaps/ignore_step.md new file mode 100644 index 000000000..87c113c44 --- /dev/null +++ b/tests/testthat/_snaps/ignore_step.md @@ -0,0 +1,64 @@ +# ignore_step() errors when needed + + Code + ignore_step(rec) + Condition + Error in `ignore_step()`: + ! `x` doesn't contain any steps to remove. + +--- + + Code + ignore_step(rec1234) + Condition + Error in `ignore_step()`: + ! One of `number` or `id` must be supplied. + +--- + + Code + ignore_step(rec1234, number = 1, id = "pca") + Condition + Error in `ignore_step()`: + ! Exactly one of `number` or `id` must be supplied. + +--- + + Code + ignore_step(rec1234, number = 0) + Condition + Error in `ignore_step()`: + ! `number` must only contain values between 1 and 4. Not 0. + +--- + + Code + ignore_step(rec1234, number = 10) + Condition + Error in `ignore_step()`: + ! `number` must only contain values between 1 and 4. Not 10. + +--- + + Code + ignore_step(rec1234, id = "no id") + Condition + Error in `ignore_step()`: + ! Supplied `id` ("no id") not found in the recipe. + +--- + + Code + ignore_step(rec12) + Condition + Error in `ignore_step()`: + ! `x` must not contain any trained steps. + +--- + + Code + ignore_step(rec1234) + Condition + Error in `ignore_step()`: + ! `x` must not contain any trained steps. + diff --git a/tests/testthat/test-ignore_step.R b/tests/testthat/test-ignore_step.R new file mode 100644 index 000000000..f56d9bbb0 --- /dev/null +++ b/tests/testthat/test-ignore_step.R @@ -0,0 +1,108 @@ +test_that("ignore_step() work correctly", { + rec <- recipe(mpg ~ ., data = mtcars) + + rec1234 <- recipe(mpg ~ ., data = mtcars) %>% + step_dummy(all_nominal_predictors(), id = "dummy") %>% + step_impute_mean(all_numeric_predictors(), id = "impute_mean") %>% + step_normalize(all_numeric_predictors(), id = "normalize") %>% + step_pca(all_numeric_predictors(), id = "pca") + + rec234 <- recipe(mpg ~ ., data = mtcars) %>% + step_impute_mean(all_numeric_predictors(), id = "impute_mean") %>% + step_normalize(all_numeric_predictors(), id = "normalize") %>% + step_pca(all_numeric_predictors(), id = "pca") + + rec34 <- recipe(mpg ~ ., data = mtcars) %>% + step_normalize(all_numeric_predictors(), id = "normalize") %>% + step_pca(all_numeric_predictors(), id = "pca") + + rec123 <- recipe(mpg ~ ., data = mtcars) %>% + step_dummy(all_nominal_predictors(), id = "dummy") %>% + step_impute_mean(all_numeric_predictors(), id = "impute_mean") %>% + step_normalize(all_numeric_predictors(), id = "normalize") + + expect_equal( + ignore_attr = TRUE, + ignore_step(rec1234, number = 1), + rec234 + ) + + expect_equal( + ignore_attr = TRUE, + ignore_step(rec1234, number = 1:2), + rec34 + ) + + expect_equal( + ignore_attr = TRUE, + ignore_step(rec1234, number = 1:4), + rec + ) + + expect_equal( + ignore_attr = TRUE, + ignore_step(rec1234, number = 1), + rec234 + ) + + expect_equal( + ignore_attr = TRUE, + ignore_step(rec1234, id = "pca"), + rec123 + ) +}) + +test_that("ignore_step() errors when needed", { + rec <- recipe(mpg ~ ., data = mtcars) + + rec1234 <- recipe(mpg ~ ., data = mtcars) %>% + step_dummy(all_nominal_predictors(), id = "dummy") %>% + step_impute_mean(all_numeric_predictors(), id = "impute_mean") %>% + step_normalize(all_numeric_predictors(), id = "normalize") %>% + step_pca(all_numeric_predictors(), id = "pca") + + expect_snapshot( + error = TRUE, + ignore_step(rec) + ) + expect_snapshot( + error = TRUE, + ignore_step(rec1234) + ) + expect_snapshot( + error = TRUE, + ignore_step(rec1234, number = 1, id = "pca") + ) + expect_snapshot( + error = TRUE, + ignore_step(rec1234, number = 0) + ) + expect_snapshot( + error = TRUE, + ignore_step(rec1234, number = 10) + ) + expect_snapshot( + error = TRUE, + ignore_step(rec1234, id = "no id") + ) +}) + +test_that("ignore_step() errors when needed", { + rec12 <- recipe(mpg ~ ., data = mtcars) %>% + step_dummy(all_nominal_predictors(), id = "dummy") %>% + step_impute_mean(all_numeric_predictors(), id = "impute_mean") %>% + prep() + + rec1234 <- rec12 %>% + step_normalize(all_numeric_predictors(), id = "normalize") %>% + step_pca(all_numeric_predictors(), id = "pca") + + expect_snapshot( + error = TRUE, + ignore_step(rec12) + ) + expect_snapshot( + error = TRUE, + ignore_step(rec1234) + ) +}) From e333556c2d0c5ca8ae5500fd3c9550a5a897ae36 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 29 May 2024 16:32:59 -0700 Subject: [PATCH 2/2] finish up documentation for ignore_step --- NAMESPACE | 1 + R/ignore_step.R | 11 +++++++++-- man/ignore_step.Rd | 8 ++++++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 426a10ff3..273bf215f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -582,6 +582,7 @@ export(get_case_weights) export(get_keep_original_cols) export(has_role) export(has_type) +export(ignore_step) export(imp_vars) export(importance_weights) export(is_trained) diff --git a/R/ignore_step.R b/R/ignore_step.R index 162018b48..53202c683 100644 --- a/R/ignore_step.R +++ b/R/ignore_step.R @@ -2,7 +2,7 @@ #' #' `ignore_step` will return a recipe without steps specified by the `number` or #' `id` argument. - +#' #' @param x A `recipe` object. #' @param number An integer vector, Denoting the positions of the steps that #' should be removed. @@ -15,12 +15,19 @@ #' #' @return a `recipe` object. #' -#' @examplesIf rlang::is_installed("modeldata") +#' @examples #' rec <- recipe(mpg ~ ., data = mtcars) %>% #' step_dummy(all_nominal_predictors()) %>% #' step_impute_mean(all_numeric_predictors()) %>% #' step_normalize(all_numeric_predictors()) %>% #' step_pca(all_numeric_predictors(), id = "PCA") +#' +#' ignore_step(rec, number = 1) +#' +#' ignore_step(rec, number = 1:2) +#' +#' ignore_step(rec, id = "PCA") +#' @export ignore_step <- function(x, number, id) { if (any(map_lgl(x$steps, is_trained))) { cli::cli_abort( diff --git a/man/ignore_step.Rd b/man/ignore_step.Rd index 07b158a0b..566e8ef35 100644 --- a/man/ignore_step.Rd +++ b/man/ignore_step.Rd @@ -27,11 +27,15 @@ a \code{recipe} object. in a error. } \examples{ -\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} rec <- recipe(mpg ~ ., data = mtcars) \%>\% step_dummy(all_nominal_predictors()) \%>\% step_impute_mean(all_numeric_predictors()) \%>\% step_normalize(all_numeric_predictors()) \%>\% step_pca(all_numeric_predictors(), id = "PCA") -\dontshow{\}) # examplesIf} + +ignore_step(rec, number = 1) + +ignore_step(rec, number = 1:2) + +ignore_step(rec, id = "PCA") }