-
Notifications
You must be signed in to change notification settings - Fork 113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add ignore_step function #1324
base: main
Are you sure you want to change the base?
add ignore_step function #1324
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,71 @@ | ||||||||||||||||||
#' Remove steps from recipe | ||||||||||||||||||
#' | ||||||||||||||||||
#' `ignore_step` will return a recipe without steps specified by the `number` or | ||||||||||||||||||
#' `id` argument. | ||||||||||||||||||
#' | ||||||||||||||||||
#' @param x A `recipe` object. | ||||||||||||||||||
#' @param number An integer vector, Denoting the positions of the steps that | ||||||||||||||||||
#' should be removed. | ||||||||||||||||||
#' @param id A character string. Denoting the `id` of the steps that should be | ||||||||||||||||||
#' removed. | ||||||||||||||||||
Comment on lines
+7
to
+10
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
#' | ||||||||||||||||||
#' @details | ||||||||||||||||||
#' `number` or `id` must be specified. Specifying neither or both will result | ||||||||||||||||||
#' in a error. | ||||||||||||||||||
#' | ||||||||||||||||||
#' @return a `recipe` object. | ||||||||||||||||||
#' | ||||||||||||||||||
#' @examples | ||||||||||||||||||
#' rec <- recipe(mpg ~ ., data = mtcars) %>% | ||||||||||||||||||
#' step_dummy(all_nominal_predictors()) %>% | ||||||||||||||||||
#' step_impute_mean(all_numeric_predictors()) %>% | ||||||||||||||||||
#' step_normalize(all_numeric_predictors()) %>% | ||||||||||||||||||
#' step_pca(all_numeric_predictors(), id = "PCA") | ||||||||||||||||||
#' | ||||||||||||||||||
#' ignore_step(rec, number = 1) | ||||||||||||||||||
#' | ||||||||||||||||||
#' ignore_step(rec, number = 1:2) | ||||||||||||||||||
#' | ||||||||||||||||||
#' ignore_step(rec, id = "PCA") | ||||||||||||||||||
#' @export | ||||||||||||||||||
ignore_step <- function(x, number, id) { | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would give |
||||||||||||||||||
if (any(map_lgl(x$steps, is_trained))) { | ||||||||||||||||||
cli::cli_abort( | ||||||||||||||||||
"{.arg x} must not contain any trained steps." | ||||||||||||||||||
) | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
n_steps <- length(x$steps) | ||||||||||||||||||
|
||||||||||||||||||
if (n_steps == 0) { | ||||||||||||||||||
cli::cli_abort("{.arg x} doesn't contain any steps to remove.") | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
arg <- rlang::check_exclusive(number, id) | ||||||||||||||||||
Comment on lines
+40
to
+44
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could allow |
||||||||||||||||||
|
||||||||||||||||||
if (arg == "number") { | ||||||||||||||||||
if (any(number < 1 | number > n_steps)) { | ||||||||||||||||||
offenders <- number[number < 1 | number > n_steps] | ||||||||||||||||||
cli::cli_abort( | ||||||||||||||||||
"{.arg number} must only contain values between 1 and {n_steps}. \\ | ||||||||||||||||||
Not {offenders}." | ||||||||||||||||||
) | ||||||||||||||||||
} | ||||||||||||||||||
} else { | ||||||||||||||||||
step_ids <- vapply(x$steps, function(x) x$id, character(1)) | ||||||||||||||||||
if (!(id %in% step_ids)) { | ||||||||||||||||||
cli::cli_abort( | ||||||||||||||||||
"Supplied {.arg id} ({.val {id}}) not found in the recipe." | ||||||||||||||||||
) | ||||||||||||||||||
} | ||||||||||||||||||
number <- which(id == step_ids) | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
x$steps <- x$steps[-number] | ||||||||||||||||||
|
||||||||||||||||||
if (length(x$steps) == 0) { | ||||||||||||||||||
x["steps"] <- list(NULL) | ||||||||||||||||||
} | ||||||||||||||||||
hfrick marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||
|
||||||||||||||||||
x | ||||||||||||||||||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# ignore_step() errors when needed | ||
|
||
Code | ||
ignore_step(rec) | ||
Condition | ||
Error in `ignore_step()`: | ||
! `x` doesn't contain any steps to remove. | ||
|
||
--- | ||
|
||
Code | ||
ignore_step(rec1234) | ||
Condition | ||
Error in `ignore_step()`: | ||
! One of `number` or `id` must be supplied. | ||
|
||
--- | ||
|
||
Code | ||
ignore_step(rec1234, number = 1, id = "pca") | ||
Condition | ||
Error in `ignore_step()`: | ||
! Exactly one of `number` or `id` must be supplied. | ||
|
||
--- | ||
|
||
Code | ||
ignore_step(rec1234, number = 0) | ||
Condition | ||
Error in `ignore_step()`: | ||
! `number` must only contain values between 1 and 4. Not 0. | ||
|
||
--- | ||
|
||
Code | ||
ignore_step(rec1234, number = 10) | ||
Condition | ||
Error in `ignore_step()`: | ||
! `number` must only contain values between 1 and 4. Not 10. | ||
|
||
--- | ||
|
||
Code | ||
ignore_step(rec1234, id = "no id") | ||
Condition | ||
Error in `ignore_step()`: | ||
! Supplied `id` ("no id") not found in the recipe. | ||
|
||
--- | ||
|
||
Code | ||
ignore_step(rec12) | ||
Condition | ||
Error in `ignore_step()`: | ||
! `x` must not contain any trained steps. | ||
|
||
--- | ||
|
||
Code | ||
ignore_step(rec1234) | ||
Condition | ||
Error in `ignore_step()`: | ||
! `x` must not contain any trained steps. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
test_that("ignore_step() work correctly", { | ||
rec <- recipe(mpg ~ ., data = mtcars) | ||
|
||
rec1234 <- recipe(mpg ~ ., data = mtcars) %>% | ||
step_dummy(all_nominal_predictors(), id = "dummy") %>% | ||
step_impute_mean(all_numeric_predictors(), id = "impute_mean") %>% | ||
step_normalize(all_numeric_predictors(), id = "normalize") %>% | ||
step_pca(all_numeric_predictors(), id = "pca") | ||
|
||
rec234 <- recipe(mpg ~ ., data = mtcars) %>% | ||
step_impute_mean(all_numeric_predictors(), id = "impute_mean") %>% | ||
step_normalize(all_numeric_predictors(), id = "normalize") %>% | ||
step_pca(all_numeric_predictors(), id = "pca") | ||
|
||
rec34 <- recipe(mpg ~ ., data = mtcars) %>% | ||
step_normalize(all_numeric_predictors(), id = "normalize") %>% | ||
step_pca(all_numeric_predictors(), id = "pca") | ||
|
||
rec123 <- recipe(mpg ~ ., data = mtcars) %>% | ||
step_dummy(all_nominal_predictors(), id = "dummy") %>% | ||
step_impute_mean(all_numeric_predictors(), id = "impute_mean") %>% | ||
step_normalize(all_numeric_predictors(), id = "normalize") | ||
|
||
expect_equal( | ||
ignore_attr = TRUE, | ||
ignore_step(rec1234, number = 1), | ||
rec234 | ||
) | ||
Comment on lines
+24
to
+28
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm surprised by the argument order here, why do you put |
||
|
||
expect_equal( | ||
ignore_attr = TRUE, | ||
ignore_step(rec1234, number = 1:2), | ||
rec34 | ||
) | ||
|
||
expect_equal( | ||
ignore_attr = TRUE, | ||
ignore_step(rec1234, number = 1:4), | ||
rec | ||
) | ||
|
||
expect_equal( | ||
ignore_attr = TRUE, | ||
ignore_step(rec1234, number = 1), | ||
rec234 | ||
) | ||
|
||
expect_equal( | ||
ignore_attr = TRUE, | ||
ignore_step(rec1234, id = "pca"), | ||
rec123 | ||
) | ||
}) | ||
|
||
test_that("ignore_step() errors when needed", { | ||
rec <- recipe(mpg ~ ., data = mtcars) | ||
|
||
rec1234 <- recipe(mpg ~ ., data = mtcars) %>% | ||
step_dummy(all_nominal_predictors(), id = "dummy") %>% | ||
step_impute_mean(all_numeric_predictors(), id = "impute_mean") %>% | ||
step_normalize(all_numeric_predictors(), id = "normalize") %>% | ||
step_pca(all_numeric_predictors(), id = "pca") | ||
|
||
expect_snapshot( | ||
error = TRUE, | ||
ignore_step(rec) | ||
) | ||
expect_snapshot( | ||
error = TRUE, | ||
ignore_step(rec1234) | ||
) | ||
expect_snapshot( | ||
error = TRUE, | ||
ignore_step(rec1234, number = 1, id = "pca") | ||
) | ||
expect_snapshot( | ||
error = TRUE, | ||
ignore_step(rec1234, number = 0) | ||
) | ||
expect_snapshot( | ||
error = TRUE, | ||
ignore_step(rec1234, number = 10) | ||
) | ||
expect_snapshot( | ||
error = TRUE, | ||
ignore_step(rec1234, id = "no id") | ||
) | ||
}) | ||
|
||
test_that("ignore_step() errors when needed", { | ||
rec12 <- recipe(mpg ~ ., data = mtcars) %>% | ||
step_dummy(all_nominal_predictors(), id = "dummy") %>% | ||
step_impute_mean(all_numeric_predictors(), id = "impute_mean") %>% | ||
prep() | ||
|
||
rec1234 <- rec12 %>% | ||
step_normalize(all_numeric_predictors(), id = "normalize") %>% | ||
step_pca(all_numeric_predictors(), id = "pca") | ||
|
||
expect_snapshot( | ||
error = TRUE, | ||
ignore_step(rec12) | ||
) | ||
expect_snapshot( | ||
error = TRUE, | ||
ignore_step(rec1234) | ||
) | ||
}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.