From c29e57ca9f1ea004fd0502eba91c77971019ca80 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Fri, 29 Nov 2024 00:36:36 +0100 Subject: [PATCH 01/17] init new param WIP --- R/PipeOpColRoles.R | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index e5e56cc16..f3b0c9e95 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -28,13 +28,15 @@ #' #' @section Parameters: #' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as: -#' * `new_role` :: `list`\cr +#' * `new_role` :: named `list`\cr #' Named list of new column roles. The names must match the column names of the input task that #' will later be trained/predicted on. Each entry of the list must contain a character vector with #' possible values of [`mlr_reflections$task_col_roles`][mlr3::mlr_reflections]. If the value is #' given as `character()`, the column will be dropped from the input task. Changing the role of a #' column results in this column loosing its previous role(s). Setting a new target variable or #' changing the role of an existing target variable is not supported. +#' * `new_role_direct` :: named `list`\cr# +#' a #' #' @section Methods: #' Only methods inherited from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`]. @@ -48,6 +50,11 @@ #' )) #' #' pop$train(list(task)) +#' +#' pop$param_set$set_values(new_role_direct = list( +#' +#' )) +#' #' @family PipeOps #' @template seealso_pipeopslist #' @include PipeOpTaskPreproc.R @@ -71,6 +78,17 @@ PipeOpColRoles = R6Class("PipeOpColRoles", all_col_roles = unique(unlist(mlr3::mlr_reflections$task_col_roles)) check_subset(unlist(x), all_col_roles[all_col_roles != "target"]) }, .parent = topenv()) + ), + new_role_direct = p_uty( + tags = c("train", "predict"), + custom_check = crate(function(x) { + first_check = check_list(x, types = "character", any.missing = FALSE, min.len = 1L, names = "named") + # return the error directly if this failed + if (is.character(first_check)) { + return(first_check) + } + + }, .parent = topenv()) ) ) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE) From 5a00c9b440a0d41e54eb5f11f2b9384aaa3b8c32 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sun, 1 Dec 2024 23:32:08 +0100 Subject: [PATCH 02/17] added new param new_role_direct as inverse of new_role + simplified code a bit --- R/PipeOpColRoles.R | 74 +++++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index f3b0c9e95..af0354b62 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -64,30 +64,30 @@ PipeOpColRoles = R6Class("PipeOpColRoles", public = list( initialize = function(id = "colroles", param_vals = list()) { ps = ps( - # named list, each entry with a vector of roles + # named list, each entry with a vector of roles, names are columns new_role = p_uty( tags = c("train", "predict"), custom_check = crate(function(x) { first_check = check_list(x, types = "character", any.missing = FALSE, min.len = 1L, names = "named") - # return the error directly if this failed - if (is.character(first_check)) { - return(first_check) - } - # changing anything target related is not supported - # a value of "character()" will lead to the column being dropped + # Return the error directly if this failed + if (is.character(first_check)) return(first_check) + # Changing anything target related is not supported. + # A value of "character()" is accepted. all_col_roles = unique(unlist(mlr3::mlr_reflections$task_col_roles)) check_subset(unlist(x), all_col_roles[all_col_roles != "target"]) }, .parent = topenv()) ), + # named list, each with a vector of columns, names are column roles new_role_direct = p_uty( tags = c("train", "predict"), custom_check = crate(function(x) { - first_check = check_list(x, types = "character", any.missing = FALSE, min.len = 1L, names = "named") - # return the error directly if this failed - if (is.character(first_check)) { - return(first_check) - } - + first_check = check_list(x, types = "character", any.missing = FALSE, min.len = 1L, unique = TRUE, names = "named", null.ok = TRUE) + # Return the error directly if this failed + if (is.character(first_check)) return(first_check) + # Changing anything target related is not supported. + # A value of "character()" is accepted. + all_col_roles = unique(unlist(mlr3::mlr_reflections$task_col_roles)) + check_subset(names(x), all_col_roles[all_col_roles != "target"]) }, .parent = topenv()) ) ) @@ -96,31 +96,45 @@ PipeOpColRoles = R6Class("PipeOpColRoles", ), private = list( .transform = function(task) { - new_role = self$param_set$values$new_role + new_role = self$param_set$values[["new_role"]] + new_role_direct = self$param_set$values[["new_role_direct"]] - if (is.null(new_role)) { + if (is.null(new_role) && is.null(new_role_direct)) { return(task) # early exit } + if (!is.null(new_role) && !is.null(new_role_direct)) { + stop("Both parameters, 'new_role' and 'new_role_direct', are set. Provide only one parameter at a time.") + } - new_role_names = names(new_role) - ids = task$col_info$id - ids = ids[ids != "..row_ids"] - # names of "new_role" must be a subset of the column names of the task - assert_subset(new_role_names, choices = ids, empty.ok = FALSE) + # Get the columns for which we change roles + # Replace NULLs with character(0) + if (!is.null(new_role)) { + new_role_cols = names(new_role) + } else if (!is.null(new_role_direct)) { + new_role_cols = unique(unlist(new_role_direct)) + # Replace NULL with empty string, to allow option with NULL (might throw this out) + # would make sense to add NULL to new_role as well if we keep it + new_role_direct <- lapply(new_role_direct, function(x) if (is.null(x)) character(0) else x) + } - # changing the role of a target is not supported - if (any(task$col_roles$target %in% new_role_names)) { - stopf("Cannot change the role of a target.") + # Changing the role of a target is not supported + if (any(task$col_roles$target %in% new_role_cols)) { + stop("Cannot change the role of a target.") } - # drop (all) old role(s) - task$col_roles = map(task$col_roles, .f = function(x) x[x %nin% new_role_names]) + if (!is.null(new_role)) { + # Drop (all) column(s) for which we change the role + task$col_roles = map(task$col_roles, .f = function(x) x[x %nin% new_role_cols]) - # add the new role(s) - all_col_roles = unique(unlist(mlr3::mlr_reflections$task_col_roles)) - for(role in all_col_roles) { - task$col_roles[[role]] = union(task$col_roles[[role]], - y = names(which(unlist(map(new_role, .f = function(x) role %in% x))))) + # Add new role(s) for column(s) for which we change the role + all_col_roles = unique(unlist(mlr3::mlr_reflections$task_col_roles)) + for(role in all_col_roles) { + task$col_roles[[role]] = union(task$col_roles[[role]], + names(which(unlist(map(new_role, .f = function(x) role %in% x))))) + } + } else if (!is.null(new_role_direct)) { + # Update roles to match new_role_direct + task$col_roles[names(new_role_direct)] = lapply(names(new_role_direct), function(role) new_role_direct[[role]]) } task From ede696ff79965d39c1d50272bc96fd09799b369b Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sun, 1 Dec 2024 23:32:23 +0100 Subject: [PATCH 03/17] tests for new param new_role_direct --- tests/testthat/test_pipeop_colroles.R | 79 ++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test_pipeop_colroles.R b/tests/testthat/test_pipeop_colroles.R index 500f5f0b3..deb604e2a 100644 --- a/tests/testthat/test_pipeop_colroles.R +++ b/tests/testthat/test_pipeop_colroles.R @@ -1,47 +1,112 @@ context("PipeOpColRoles") test_that("PipeOpColRoles - basic properties", { + op = PipeOpColRoles$new() task = mlr_tasks$get("iris") + expect_pipeop(op) expect_equal(task, train_pipeop(op, inputs = list(task))$output) expect_equal(task, predict_pipeop(op, inputs = list(task))$output) expect_datapreproc_pipeop_class(PipeOpColRoles, task = task) + }) -test_that("PipeOpColRoles - assertion works", { +test_that("PipeOpColRoles - only correct roles are accepted", { + expect_error(PipeOpColRoles$new(param_vals = list(new_role = "wrong")), regexp = "list") expect_error(PipeOpColRoles$new(param_vals = list(new_role = list(a = "wrong", b = NA))), regexp = "character") - expect_error(PipeOpColRoles$new(param_vals = list(new_role = list(a = "wrong", b = "target"))), regexp = "subset") + expect_error(PipeOpColRoles$new(param_vals = list(new_role = list(a = "wrong"))), regexp = "subset") + expect_error(PipeOpColRoles$new(param_vals = list(new_role = list(a = "target"))), regexp = "subset") + + expect_error(PipeOpColRoles$new(param_vals = list(new_role_direct = "wrong")), regexp = "list") + expect_error(PipeOpColRoles$new(param_vals = list(new_role_direct = list(wrong = "x", feature = NA))), regexp = "character") + expect_error(PipeOpColRoles$new(param_vals = list(new_role_direct = list(wrong = "x"))), regexp = "subset") + expect_error(PipeOpColRoles$new(param_vals = list(new_role_direct = list(target = "y"))), regexp = "subset") + + # test that no duplicates for group, name, weight? (error during init) + + # test that roles are correct for task types? (error during training) + }) -test_that("PipeOpColRoles - name checking works", { +test_that("PipeOpColRoles - only existing columns are accepted", { + task = mlr_tasks$get("iris") task$cbind(data.table(rn = sprintf("%03d", 1:150))) + op = PipeOpColRoles$new(param_vals = list(new_role = list(rn = "name", wrong = "feature"))) expect_error(train_pipeop(op, inputs = list(task)), regexp = "subset") + + op = PipeOpColRoles$new(param_vals = list(new_role_direct = list(name = "rn", feature = "wrong"))) + expect_error(train_pipeop(op, inputs = list(task)), regexp = "subset") + }) test_that("PipeOpColRoles - changing the role of a target fails", { + task = mlr_tasks$get("iris") + op = PipeOpColRoles$new(param_vals = list(new_role = list(Species = "feature"))) expect_error(train_pipeop(op, inputs = list(task)), regexp = "role of a target") + + op = PipeOpColRoles$new(param_vals = list(new_role_direct = list(feature = "Species"))) + expect_error(train_pipeop(op, inputs = list(task)), regexp = "role of a target") + }) -test_that("PipeOpColRoles - functionality works", { +test_that("PipeOpColRoles - new_role works", { + task = mlr_tasks$get("iris") task$cbind(data.table(rn = sprintf("%03d", 1:150))) + op = PipeOpColRoles$new(param_vals = list(new_role = list(rn = "name", Petal.Length = "order", Petal.Width = character(0)))) - train_out = train_pipeop(op, inputs = list(task))$output + + train_out = train_pipeop(op, inputs = list(task))[[1L]] + col_roles_actual = train_out$col_roles col_roles_expected = list( feature = c("Sepal.Length", "Sepal.Width"), target = "Species", name = "rn", - order = "Petal.Length", stratum = character(0), group = character(0), weight = character(0)) + order = "Petal.Length", stratum = character(0), group = character(0), weight = character(0) + ) + + # this does nothing? if ("weights_learner" %in% names(task)) names(col_roles_expected)[names(col_roles_expected) == "weight"] = "weights_learner" + expect_equal(train_out$col_roles[names(col_roles_expected)], col_roles_expected) expect_equal(train_out$row_names$row_name, task$data(cols = "rn")[[1L]]) expect_true("Petal.Width" %nin% colnames(train_out$data())) - predict_out = predict_pipeop(op, inputs = list(task))$output + + predict_out = predict_pipeop(op, inputs = list(task))[[1L]] expect_equal(train_out, predict_out) }) + +test_that("PipeOpColRoles - new_role_direct works", { + + task = mlr_tasks$get("iris") + task$cbind(data.table(rn = sprintf("%03d", 1:150))) + + op = PipeOpColRoles$new(param_vals = list(new_role_direct = list( + name = "rn", order = "Petal.Length", feature = character(0)))) + + train_out = train_pipeop(op, inputs = list(task))[[1L]] + + col_roles_actual = train_out$col_roles + col_roles_expected = list( + feature = character(0), target = "Species", name = "rn", + order = "Petal.Length", stratum = character(0), group = character(0), weight = character(0) + ) + + # ask whether necessary + # if ("weights_learner" %in% names(task)) names(col_roles_expected)[names(col_roles_expected) == "weight"] = "weights_learner" + + expect_equal(train_out$col_roles[names(col_roles_expected)], col_roles_expected) + expect_equal(train_out$row_names$row_name, task$data(cols = "rn")[[1L]]) + expect_equal(train_out$data(), task$data(cols = "Species")) + + predict_out = predict_pipeop(op, inputs = list(task))[[1L]] + expect_equal(train_out, predict_out) +}) + +# if we keep behavior with NULL, add tests From 2d006ff3842935f15e9ae250b5a5fc7eccf7fe02 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sun, 1 Dec 2024 23:47:51 +0100 Subject: [PATCH 04/17] docs + ran document --- R/PipeOpColRoles.R | 36 +++++++++++++++++++++------------- man/mlr_pipeops_colroles.Rd | 39 ++++++++++++++++++++++++++----------- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index af0354b62..a0fdf4962 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -5,7 +5,9 @@ #' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @description -#' Changes the column roles of the input [`Task`][mlr3::Task] according to `new_role`. +#' Changes the column roles of the input [`Task`][mlr3::Task] according to `new_role` or its inverse `new_role_direct`. +#' +#' Setting a new target variable or changing the role of an existing target variable is not supported. #' #' @section Construction: #' ``` @@ -21,7 +23,7 @@ #' @section Input and Output Channels: #' Input and output channels are inherited from [`PipeOpTaskPreproc`]. #' -#' The output is the input [`Task`][mlr3::Task] with transformed column roles according to `new_role`. +#' The output is the input [`Task`][mlr3::Task] with transformed column roles according to `new_role` or its inverse `new_role_direct`. #' #' @section State: #' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`]. @@ -29,14 +31,17 @@ #' @section Parameters: #' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as: #' * `new_role` :: named `list`\cr -#' Named list of new column roles. The names must match the column names of the input task that +#' Named list of new column roles by column. The names must match the column names of the input task that #' will later be trained/predicted on. Each entry of the list must contain a character vector with -#' possible values of [`mlr_reflections$task_col_roles`][mlr3::mlr_reflections]. If the value is -#' given as `character()`, the column will be dropped from the input task. Changing the role of a -#' column results in this column loosing its previous role(s). Setting a new target variable or -#' changing the role of an existing target variable is not supported. +#' possible values of [`mlr_reflections$task_col_roles`][mlr3::mlr_reflections]. +#' If the value is given as `character()`, the column will be dropped from the input task. Changing the role +#' of a column results in this column loosing its previous role(s). #' * `new_role_direct` :: named `list`\cr# -#' a +#' Named list of new column roles by role. The names must match the possible column roles, i.e. values of +#' [`mlr_reflections$task_col_roles`][mlr3::mlr_reflections]. Each entry of the list must contain a character +#' vector with column names of the input task that will later be trained/predicted on. +#' If the value is given as `character()` or `NULL`, all columns will be dropped from the role given in the element +#' name. The value given for a role overwrites the previous entry in `task$col_roles` for that role, completely. #' #' @section Methods: #' Only methods inherited from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`]. @@ -44,16 +49,21 @@ #' @examples #' library("mlr3") #' -#' task = tsk("boston_housing") +#' task = tsk("penguins") #' pop = po("colroles", param_vals = list( -#' new_role = list(town = c("order", "feature")) +#' new_role = list(body_mass = c("order", "feature")) #' )) #' -#' pop$train(list(task)) +#' train_out1 = pop$train(list(task)) +#' train_out1$col_roles #' -#' pop$param_set$set_values(new_role_direct = list( +#' pop$param_set$set_values( +#' new_role = NULL, +#' new_role_direct = list(order = character(), group = "island") +#' ) #' -#' )) +#' train_out2 = pop$train(list(train_out)) +#' train_out2$col_roles #' #' @family PipeOps #' @template seealso_pipeopslist diff --git a/man/mlr_pipeops_colroles.Rd b/man/mlr_pipeops_colroles.Rd index fa58f1ed5..679a2ca1a 100644 --- a/man/mlr_pipeops_colroles.Rd +++ b/man/mlr_pipeops_colroles.Rd @@ -8,7 +8,9 @@ \code{\link[R6:R6Class]{R6Class}} object inheriting from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. } \description{ -Changes the column roles of the input \code{\link[mlr3:Task]{Task}} according to \code{new_role}. +Changes the column roles of the input \code{\link[mlr3:Task]{Task}} according to \code{new_role} or its inverse \code{new_role_direct}. + +Setting a new target variable or changing the role of an existing target variable is not supported. } \section{Construction}{ @@ -28,7 +30,7 @@ be set during construction. Default \code{list()}. Input and output channels are inherited from \code{\link{PipeOpTaskPreproc}}. -The output is the input \code{\link[mlr3:Task]{Task}} with transformed column roles according to \code{new_role}. +The output is the input \code{\link[mlr3:Task]{Task}} with transformed column roles according to \code{new_role} or its inverse \code{new_role_direct}. } \section{State}{ @@ -40,13 +42,18 @@ The \verb{$state} is a named \code{list} with the \verb{$state} elements inherit The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}, as well as: \itemize{ -\item \code{new_role} :: \code{list}\cr -Named list of new column roles. The names must match the column names of the input task that +\item \code{new_role} :: named \code{list}\cr +Named list of new column roles by column. The names must match the column names of the input task that will later be trained/predicted on. Each entry of the list must contain a character vector with -possible values of \code{\link[mlr3:mlr_reflections]{mlr_reflections$task_col_roles}}. If the value is -given as \code{character()}, the column will be dropped from the input task. Changing the role of a -column results in this column loosing its previous role(s). Setting a new target variable or -changing the role of an existing target variable is not supported. +possible values of \code{\link[mlr3:mlr_reflections]{mlr_reflections$task_col_roles}}. +If the value is given as \code{character()}, the column will be dropped from the input task. Changing the role +of a column results in this column loosing its previous role(s). +\item \code{new_role_direct} :: named \code{list}\cr# +Named list of new column roles by role. The names must match the possible column roles, i.e. values of +\code{\link[mlr3:mlr_reflections]{mlr_reflections$task_col_roles}}. Each entry of the list must contain a character +vector with column names of the input task that will later be trained/predicted on. +If the value is given as \code{character()} or \code{NULL}, all columns will be dropped from the role given in the element +name. The value given for a role overwrites the previous entry in \code{task$col_roles} for that role, completely. } } @@ -58,12 +65,22 @@ Only methods inherited from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{Pi \examples{ library("mlr3") -task = tsk("boston_housing") +task = tsk("penguins") pop = po("colroles", param_vals = list( - new_role = list(town = c("order", "feature")) + new_role = list(body_mass = c("order", "feature")) )) -pop$train(list(task)) +train_out1 = pop$train(list(task)) +train_out1$col_roles + +pop$param_set$set_values( + new_role = NULL, + new_role_direct = list(order = character(), group = "island") +) + +train_out2 = pop$train(list(train_out)) +train_out2$col_roles + } \seealso{ https://mlr-org.com/pipeops.html From d3af4751f4acb35d50a3cc83add1093841770629 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sun, 1 Dec 2024 23:52:08 +0100 Subject: [PATCH 05/17] Updated NEWS.mde --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index f6d094e79..cb5f04cc2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,10 @@ # mlr3pipelines 0.7.1-9000 -* New parameter `no_collapse_above_absolute` in `PipeOpCollapseFactors` / `po("collapse_factors")`. +* New parameter `no_collapse_above_absolute` for `PipeOpCollapseFactors` / `po("collapse_factors")`. * Fix: `PipeOpCollapseFactors` now correctly collapses levels of ordered factors. * Fix: `LearnerClassifAvg` and `LearnerRegrAvg` hyperparameters get the `"required"` tag. * New parameter `use_groups` (default `TRUE`) for `PipeOpSubsampling` to respect grouping (changed default behaviour for grouped data) +* New parameter `new_role_direct` for `PipeOpColRoles` / `po("colroles")` to change column roles by role instead of by column. # mlr3pipelines 0.7.1 From e9ed3cc56d842e3fd307f946ca783eb64e412f90 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Mon, 2 Dec 2024 10:47:11 +0100 Subject: [PATCH 06/17] typo in examples --- R/PipeOpColRoles.R | 2 +- man/mlr_pipeops_colroles.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index a0fdf4962..7d1383ab3 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -62,7 +62,7 @@ #' new_role_direct = list(order = character(), group = "island") #' ) #' -#' train_out2 = pop$train(list(train_out)) +#' train_out2 = pop$train(list(train_out1)) #' train_out2$col_roles #' #' @family PipeOps diff --git a/man/mlr_pipeops_colroles.Rd b/man/mlr_pipeops_colroles.Rd index 679a2ca1a..339c52085 100644 --- a/man/mlr_pipeops_colroles.Rd +++ b/man/mlr_pipeops_colroles.Rd @@ -78,7 +78,7 @@ pop$param_set$set_values( new_role_direct = list(order = character(), group = "island") ) -train_out2 = pop$train(list(train_out)) +train_out2 = pop$train(list(train_out1)) train_out2$col_roles } From 93e4bcfd1374629f0165311db8363c820843c9cc Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Mon, 2 Dec 2024 12:29:39 +0100 Subject: [PATCH 07/17] fix in examples --- R/PipeOpColRoles.R | 2 +- man/mlr_pipeops_colroles.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index 7d1383ab3..457541153 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -54,7 +54,7 @@ #' new_role = list(body_mass = c("order", "feature")) #' )) #' -#' train_out1 = pop$train(list(task)) +#' train_out1 = pop$train(list(task))[[1L]] #' train_out1$col_roles #' #' pop$param_set$set_values( diff --git a/man/mlr_pipeops_colroles.Rd b/man/mlr_pipeops_colroles.Rd index 339c52085..192cc663d 100644 --- a/man/mlr_pipeops_colroles.Rd +++ b/man/mlr_pipeops_colroles.Rd @@ -70,7 +70,7 @@ pop = po("colroles", param_vals = list( new_role = list(body_mass = c("order", "feature")) )) -train_out1 = pop$train(list(task)) +train_out1 = pop$train(list(task))[[1L]] train_out1$col_roles pop$param_set$set_values( From 517700a597bb38d0eab5205b1ae15bc5aaa8cb65 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Tue, 3 Dec 2024 09:59:15 +0100 Subject: [PATCH 08/17] code style --- R/PipeOpColRoles.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index 457541153..8b1b3b962 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -138,7 +138,7 @@ PipeOpColRoles = R6Class("PipeOpColRoles", # Add new role(s) for column(s) for which we change the role all_col_roles = unique(unlist(mlr3::mlr_reflections$task_col_roles)) - for(role in all_col_roles) { + for (role in all_col_roles) { task$col_roles[[role]] = union(task$col_roles[[role]], names(which(unlist(map(new_role, .f = function(x) role %in% x))))) } From 28e259c9c054b476cf9a99342022ca94d9b82c50 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Mon, 9 Dec 2024 20:48:36 +0100 Subject: [PATCH 09/17] added tests for param assertions + test for correct behavior with NULL as entry in new_role and new_role_direct --- tests/testthat/test_pipeop_colroles.R | 42 ++++++++++++++++----------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/tests/testthat/test_pipeop_colroles.R b/tests/testthat/test_pipeop_colroles.R index deb604e2a..dac44e2a0 100644 --- a/tests/testthat/test_pipeop_colroles.R +++ b/tests/testthat/test_pipeop_colroles.R @@ -13,21 +13,29 @@ test_that("PipeOpColRoles - basic properties", { }) -test_that("PipeOpColRoles - only correct roles are accepted", { +test_that("PipeOpColRoles - assertions on params work", { expect_error(PipeOpColRoles$new(param_vals = list(new_role = "wrong")), regexp = "list") - expect_error(PipeOpColRoles$new(param_vals = list(new_role = list(a = "wrong", b = NA))), regexp = "character") + expect_error(PipeOpColRoles$new(param_vals = list(new_role = list(a = "wrong", b = NA))), regexp = "character,null") + expect_no_error(PipeOpColRoles$new(param_vals = list(new_role = list(a = NULL)))) expect_error(PipeOpColRoles$new(param_vals = list(new_role = list(a = "wrong"))), regexp = "subset") expect_error(PipeOpColRoles$new(param_vals = list(new_role = list(a = "target"))), regexp = "subset") + expect_error(PipeOpColRoles$new(param_vals = list(new_role = list(a = "group", b = "group"))), regexp = "up to one column per role") + expect_error(PipeOpColRoles$new(param_vals = list(new_role = list(a = "weight", b = "weight"))), regexp = "up to one column per role") + expect_error(PipeOpColRoles$new(param_vals = list(new_role = list(a = "name", b = "name"))), regexp = "up to one column per role") + expect_error(PipeOpColRoles$new(param_vals = list(new_role = list(a = c("group", "name"), b = c("group", "name")))), regexp = "up to one column per role.*?\"group\", \"name\"") + expect_error(PipeOpColRoles$new(param_vals = list(new_role_direct = "wrong")), regexp = "list") - expect_error(PipeOpColRoles$new(param_vals = list(new_role_direct = list(wrong = "x", feature = NA))), regexp = "character") + expect_error(PipeOpColRoles$new(param_vals = list(new_role_direct = list(wrong = "x", feature = NA))), regexp = "character,null") + expect_no_error(PipeOpColRoles$new(param_vals = list(new_role = list(feature = NULL)))) expect_error(PipeOpColRoles$new(param_vals = list(new_role_direct = list(wrong = "x"))), regexp = "subset") expect_error(PipeOpColRoles$new(param_vals = list(new_role_direct = list(target = "y"))), regexp = "subset") - # test that no duplicates for group, name, weight? (error during init) - - # test that roles are correct for task types? (error during training) + expect_error(PipeOpColRoles$new(param_vals = list(new_role_direct = list(group = c("x", "y")))), regexp = "up to one column per role") + expect_error(PipeOpColRoles$new(param_vals = list(new_role_direct = list(weight = c("x", "y")))), regexp = "up to one column per role") + expect_error(PipeOpColRoles$new(param_vals = list(new_role_direct = list(name = c("x", "y")))), regexp = "up to one column per role") + expect_error(PipeOpColRoles$new(param_vals = list(new_role_direct = list(group = c("x", "y"), name = c("x", "y")))), regexp = "up to one column per role.*?\"group\", \"name\"") }) @@ -61,22 +69,23 @@ test_that("PipeOpColRoles - new_role works", { task = mlr_tasks$get("iris") task$cbind(data.table(rn = sprintf("%03d", 1:150))) - op = PipeOpColRoles$new(param_vals = list(new_role = list(rn = "name", Petal.Length = "order", Petal.Width = character(0)))) + op = PipeOpColRoles$new(param_vals = list(new_role = list(rn = "name", Petal.Length = "order", Petal.Width = character(0), Sepal.Width = NULL))) train_out = train_pipeop(op, inputs = list(task))[[1L]] col_roles_actual = train_out$col_roles col_roles_expected = list( - feature = c("Sepal.Length", "Sepal.Width"), target = "Species", name = "rn", - order = "Petal.Length", stratum = character(0), group = character(0), weight = character(0) + feature = "Sepal.Length", target = "Species", name = "rn", order = "Petal.Length", + stratum = character(0), group = character(0), weight = character(0) ) - # this does nothing? + # Compatibility with upcoming new weights_learner role in mlr3 if ("weights_learner" %in% names(task)) names(col_roles_expected)[names(col_roles_expected) == "weight"] = "weights_learner" expect_equal(train_out$col_roles[names(col_roles_expected)], col_roles_expected) expect_equal(train_out$row_names$row_name, task$data(cols = "rn")[[1L]]) expect_true("Petal.Width" %nin% colnames(train_out$data())) + expect_true("Sepal.Width" %nin% colnames(train_out$data())) predict_out = predict_pipeop(op, inputs = list(task))[[1L]] expect_equal(train_out, predict_out) @@ -86,20 +95,21 @@ test_that("PipeOpColRoles - new_role_direct works", { task = mlr_tasks$get("iris") task$cbind(data.table(rn = sprintf("%03d", 1:150))) + task$col_roles$group = "Species" op = PipeOpColRoles$new(param_vals = list(new_role_direct = list( - name = "rn", order = "Petal.Length", feature = character(0)))) + name = "rn", order = "Petal.Length", feature = character(0), group = NULL))) train_out = train_pipeop(op, inputs = list(task))[[1L]] col_roles_actual = train_out$col_roles col_roles_expected = list( - feature = character(0), target = "Species", name = "rn", - order = "Petal.Length", stratum = character(0), group = character(0), weight = character(0) + feature = character(0), target = "Species", name = "rn", order = "Petal.Length", + stratum = character(0), group = character(0), weight = character(0) ) - # ask whether necessary - # if ("weights_learner" %in% names(task)) names(col_roles_expected)[names(col_roles_expected) == "weight"] = "weights_learner" + # Compatibility with upcoming new weights_learner role in mlr3 + if ("weights_learner" %in% names(task)) names(col_roles_expected)[names(col_roles_expected) == "weight"] = "weights_learner" expect_equal(train_out$col_roles[names(col_roles_expected)], col_roles_expected) expect_equal(train_out$row_names$row_name, task$data(cols = "rn")[[1L]]) @@ -108,5 +118,3 @@ test_that("PipeOpColRoles - new_role_direct works", { predict_out = predict_pipeop(op, inputs = list(task))[[1L]] expect_equal(train_out, predict_out) }) - -# if we keep behavior with NULL, add tests From 7b114f46825296545eb3b2ad7d2b378160d17e1c Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Mon, 9 Dec 2024 21:41:51 +0100 Subject: [PATCH 10/17] added assertions for params, improved control flow in transform --- R/PipeOpColRoles.R | 77 ++++++++++++++++++++++--------------- man/mlr_pipeops_colroles.Rd | 2 +- 2 files changed, 48 insertions(+), 31 deletions(-) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index 8b1b3b962..341a25d5e 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -34,7 +34,7 @@ #' Named list of new column roles by column. The names must match the column names of the input task that #' will later be trained/predicted on. Each entry of the list must contain a character vector with #' possible values of [`mlr_reflections$task_col_roles`][mlr3::mlr_reflections]. -#' If the value is given as `character()`, the column will be dropped from the input task. Changing the role +#' If the value is given as `character()` or `NULL`, the column will be dropped from the input task. Changing the role #' of a column results in this column loosing its previous role(s). #' * `new_role_direct` :: named `list`\cr# #' Named list of new column roles by role. The names must match the possible column roles, i.e. values of @@ -78,11 +78,22 @@ PipeOpColRoles = R6Class("PipeOpColRoles", new_role = p_uty( tags = c("train", "predict"), custom_check = crate(function(x) { - first_check = check_list(x, types = "character", any.missing = FALSE, min.len = 1L, names = "named") + first_check = check_list(x, types = c("character", "null"), min.len = 1L, names = "unique") + # test that this works if two vectors have the name of a column, otherwise "unique"; is unique in task$col_roles # Return the error directly if this failed - if (is.character(first_check)) return(first_check) + if (!isTRUE(first_check)) return(first_check) + + # Only one column for roles "group", "weight", and "name" + counter = c(group = 0L, weight = 0L, name = 0L) + for (i in seq_along(x)) { + counter = counter + c("group", "weight", "name") %in% x[[i]] + } + if (any(counter > 1L)) { + stopf("There may only be up to one column per role for role(s) %s.", str_collapse(names(which(counter > 1L)), quote = '"')) + } + # Changing anything target related is not supported. - # A value of "character()" is accepted. + # A value of character() or NULL is accepted. all_col_roles = unique(unlist(mlr3::mlr_reflections$task_col_roles)) check_subset(unlist(x), all_col_roles[all_col_roles != "target"]) }, .parent = topenv()) @@ -91,11 +102,18 @@ PipeOpColRoles = R6Class("PipeOpColRoles", new_role_direct = p_uty( tags = c("train", "predict"), custom_check = crate(function(x) { - first_check = check_list(x, types = "character", any.missing = FALSE, min.len = 1L, unique = TRUE, names = "named", null.ok = TRUE) + first_check = check_list(x, types = c("character", "null"), min.len = 1L, names = "unique") # Return the error directly if this failed - if (is.character(first_check)) return(first_check) + if (!isTRUE(first_check)) return(first_check) + + # Only one column for roles "group", "weight", and "name" + lens = lengths(x[c("group", "weight", "name")]) + if (any(lens > 1L)) { + stopf("There may only be up to one column per role for role(s) %s.", str_collapse(names(which(lens > 1L)), quote = '"')) + } + # Changing anything target related is not supported. - # A value of "character()" is accepted. + # A value of character() or NULL is accepted. all_col_roles = unique(unlist(mlr3::mlr_reflections$task_col_roles)) check_subset(names(x), all_col_roles[all_col_roles != "target"]) }, .parent = topenv()) @@ -116,36 +134,35 @@ PipeOpColRoles = R6Class("PipeOpColRoles", stop("Both parameters, 'new_role' and 'new_role_direct', are set. Provide only one parameter at a time.") } - # Get the columns for which we change roles - # Replace NULLs with character(0) + # Create list new_roles with similar structure to col_roles (names are column roles, entries are column names) if (!is.null(new_role)) { - new_role_cols = names(new_role) - } else if (!is.null(new_role_direct)) { - new_role_cols = unique(unlist(new_role_direct)) - # Replace NULL with empty string, to allow option with NULL (might throw this out) - # would make sense to add NULL to new_role as well if we keep it - new_role_direct <- lapply(new_role_direct, function(x) if (is.null(x)) character(0) else x) + # Set new_roles to task$col_roles with columns removed for which we change roles (as we want a column to only + # have the roles given for that column in new_role) + new_roles = map(task$col_roles, .f = function(x) x[x %nin% names(new_role)]) + + # Add new role(s) for column(s) for which we change the role + possible_col_roles = mlr3::mlr_reflections$task_col_roles[[task$task_type]] + for (role in possible_col_roles) { + new_roles[[role]] = union( + new_roles[[role]], + names(which(unlist(map(new_role, .f = function(x) role %in% x)))) + ) + } + } else { + new_roles = new_role_direct } + # Replace NULLs with character(0) + new_roles = lapply(new_roles, as.character) + # Changing the role of a target is not supported - if (any(task$col_roles$target %in% new_role_cols)) { + cols = unique(unlist(new_roles[names(new_roles) != "target"])) + if (any(task$col_roles$target %in% cols)) { stop("Cannot change the role of a target.") } - if (!is.null(new_role)) { - # Drop (all) column(s) for which we change the role - task$col_roles = map(task$col_roles, .f = function(x) x[x %nin% new_role_cols]) - - # Add new role(s) for column(s) for which we change the role - all_col_roles = unique(unlist(mlr3::mlr_reflections$task_col_roles)) - for (role in all_col_roles) { - task$col_roles[[role]] = union(task$col_roles[[role]], - names(which(unlist(map(new_role, .f = function(x) role %in% x))))) - } - } else if (!is.null(new_role_direct)) { - # Update roles to match new_role_direct - task$col_roles[names(new_role_direct)] = lapply(names(new_role_direct), function(role) new_role_direct[[role]]) - } + # Update column roles + task$col_roles[names(new_roles)] = lapply(names(new_roles), function(role) new_roles[[role]]) task } diff --git a/man/mlr_pipeops_colroles.Rd b/man/mlr_pipeops_colroles.Rd index 192cc663d..db7ade79d 100644 --- a/man/mlr_pipeops_colroles.Rd +++ b/man/mlr_pipeops_colroles.Rd @@ -46,7 +46,7 @@ The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}} Named list of new column roles by column. The names must match the column names of the input task that will later be trained/predicted on. Each entry of the list must contain a character vector with possible values of \code{\link[mlr3:mlr_reflections]{mlr_reflections$task_col_roles}}. -If the value is given as \code{character()}, the column will be dropped from the input task. Changing the role +If the value is given as \code{character()} or \code{NULL}, the column will be dropped from the input task. Changing the role of a column results in this column loosing its previous role(s). \item \code{new_role_direct} :: named \code{list}\cr# Named list of new column roles by role. The names must match the possible column roles, i.e. values of From 6c69e34dac3edd0e7f75eef39eb878e48f165a38 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Mon, 9 Dec 2024 21:42:03 +0100 Subject: [PATCH 11/17] slight change in tests --- tests/testthat/test_pipeop_colroles.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_pipeop_colroles.R b/tests/testthat/test_pipeop_colroles.R index dac44e2a0..42089bc44 100644 --- a/tests/testthat/test_pipeop_colroles.R +++ b/tests/testthat/test_pipeop_colroles.R @@ -69,13 +69,15 @@ test_that("PipeOpColRoles - new_role works", { task = mlr_tasks$get("iris") task$cbind(data.table(rn = sprintf("%03d", 1:150))) - op = PipeOpColRoles$new(param_vals = list(new_role = list(rn = "name", Petal.Length = "order", Petal.Width = character(0), Sepal.Width = NULL))) + op = PipeOpColRoles$new(param_vals = list(new_role = list( + rn = "name", Petal.Length = c("feature", "order"), Petal.Width = character(0), Sepal.Width = NULL)) + ) train_out = train_pipeop(op, inputs = list(task))[[1L]] col_roles_actual = train_out$col_roles col_roles_expected = list( - feature = "Sepal.Length", target = "Species", name = "rn", order = "Petal.Length", + feature = c("Sepal.Length", "Petal.Length"), target = "Species", name = "rn", order = "Petal.Length", stratum = character(0), group = character(0), weight = character(0) ) @@ -98,13 +100,14 @@ test_that("PipeOpColRoles - new_role_direct works", { task$col_roles$group = "Species" op = PipeOpColRoles$new(param_vals = list(new_role_direct = list( - name = "rn", order = "Petal.Length", feature = character(0), group = NULL))) + name = "rn", order = c("Petal.Length", "Sepal.Length"), feature = character(0), group = NULL)) + ) train_out = train_pipeop(op, inputs = list(task))[[1L]] col_roles_actual = train_out$col_roles col_roles_expected = list( - feature = character(0), target = "Species", name = "rn", order = "Petal.Length", + feature = character(0), target = "Species", name = "rn", order = c("Petal.Length", "Sepal.Length"), stratum = character(0), group = character(0), weight = character(0) ) @@ -118,3 +121,4 @@ test_that("PipeOpColRoles - new_role_direct works", { predict_out = predict_pipeop(op, inputs = list(task))[[1L]] expect_equal(train_out, predict_out) }) + From 4610128e3cf169b57652a54d47e85e4bdd6fa938 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Wed, 18 Dec 2024 11:04:59 +0100 Subject: [PATCH 12/17] remove dev comment --- R/PipeOpColRoles.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index 341a25d5e..c48c12710 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -79,7 +79,6 @@ PipeOpColRoles = R6Class("PipeOpColRoles", tags = c("train", "predict"), custom_check = crate(function(x) { first_check = check_list(x, types = c("character", "null"), min.len = 1L, names = "unique") - # test that this works if two vectors have the name of a column, otherwise "unique"; is unique in task$col_roles # Return the error directly if this failed if (!isTRUE(first_check)) return(first_check) From 9fa2e43aa12b6ad41f0e44b11a339be45aeb7bad Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 18 Dec 2024 11:25:22 +0100 Subject: [PATCH 13/17] Update R/PipeOpColRoles.R --- R/PipeOpColRoles.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index c48c12710..f5af86428 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -85,7 +85,7 @@ PipeOpColRoles = R6Class("PipeOpColRoles", # Only one column for roles "group", "weight", and "name" counter = c(group = 0L, weight = 0L, name = 0L) for (i in seq_along(x)) { - counter = counter + c("group", "weight", "name") %in% x[[i]] + counter = counter + names(counter) %in% x[[i]] } if (any(counter > 1L)) { stopf("There may only be up to one column per role for role(s) %s.", str_collapse(names(which(counter > 1L)), quote = '"')) From 246ef3ca83a9887110eaa39353c5b77325afbf04 Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 18 Dec 2024 11:29:17 +0100 Subject: [PATCH 14/17] custom_check return instead of stop --- R/PipeOpColRoles.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index f5af86428..29bfcf609 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -88,7 +88,7 @@ PipeOpColRoles = R6Class("PipeOpColRoles", counter = counter + names(counter) %in% x[[i]] } if (any(counter > 1L)) { - stopf("There may only be up to one column per role for role(s) %s.", str_collapse(names(which(counter > 1L)), quote = '"')) + return(sprintf("There may only be up to one column per role for role(s) %s.", str_collapse(names(which(counter > 1L)), quote = '"'))) } # Changing anything target related is not supported. @@ -108,7 +108,7 @@ PipeOpColRoles = R6Class("PipeOpColRoles", # Only one column for roles "group", "weight", and "name" lens = lengths(x[c("group", "weight", "name")]) if (any(lens > 1L)) { - stopf("There may only be up to one column per role for role(s) %s.", str_collapse(names(which(lens > 1L)), quote = '"')) + return(sprintf("There may only be up to one column per role for role(s) %s.", str_collapse(names(which(lens > 1L)), quote = '"'))) } # Changing anything target related is not supported. From 9dea1ae1a00cc6d9678a5432cef17f8af1c76586 Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 18 Dec 2024 11:42:41 +0100 Subject: [PATCH 15/17] Apply suggestions from code review --- R/PipeOpColRoles.R | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index 29bfcf609..041262a72 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -133,35 +133,33 @@ PipeOpColRoles = R6Class("PipeOpColRoles", stop("Both parameters, 'new_role' and 'new_role_direct', are set. Provide only one parameter at a time.") } - # Create list new_roles with similar structure to col_roles (names are column roles, entries are column names) + # Create list new_roles_direct with similar structure to col_roles (names are column roles, entries are column names) if (!is.null(new_role)) { - # Set new_roles to task$col_roles with columns removed for which we change roles (as we want a column to only + # Set new_roles_direct to task$col_roles with columns removed for which we change roles (as we want a column to only # have the roles given for that column in new_role) - new_roles = map(task$col_roles, .f = function(x) x[x %nin% names(new_role)]) + new_roles_direct = map(task$col_roles, .f = function(x) x[x %nin% names(new_role)]) # Add new role(s) for column(s) for which we change the role possible_col_roles = mlr3::mlr_reflections$task_col_roles[[task$task_type]] for (role in possible_col_roles) { - new_roles[[role]] = union( - new_roles[[role]], + new_roles_direct[[role]] = union( + new_roles_direct[[role]], names(which(unlist(map(new_role, .f = function(x) role %in% x)))) ) } - } else { - new_roles = new_role_direct } # Replace NULLs with character(0) - new_roles = lapply(new_roles, as.character) + new_roles_direct = lapply(new_roles_direct, as.character) # Changing the role of a target is not supported - cols = unique(unlist(new_roles[names(new_roles) != "target"])) + cols = unlist(new_roles[names(new_roles_direct) != "target"]) if (any(task$col_roles$target %in% cols)) { stop("Cannot change the role of a target.") } # Update column roles - task$col_roles[names(new_roles)] = lapply(names(new_roles), function(role) new_roles[[role]]) + task$col_roles[names(new_roles_direct)] = new_roles_direct task } From 8f6091cd3c78ac556eb7804e3e18c11a04cebe6e Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 18 Dec 2024 11:44:42 +0100 Subject: [PATCH 16/17] Apply suggestions from code review --- R/PipeOpColRoles.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index 041262a72..76bdeac6c 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -133,33 +133,33 @@ PipeOpColRoles = R6Class("PipeOpColRoles", stop("Both parameters, 'new_role' and 'new_role_direct', are set. Provide only one parameter at a time.") } - # Create list new_roles_direct with similar structure to col_roles (names are column roles, entries are column names) + # Create list new_role_direct with similar structure to col_roles (names are column roles, entries are column names) if (!is.null(new_role)) { - # Set new_roles_direct to task$col_roles with columns removed for which we change roles (as we want a column to only + # Set new_role_direct to task$col_roles with columns removed for which we change roles (as we want a column to only # have the roles given for that column in new_role) - new_roles_direct = map(task$col_roles, .f = function(x) x[x %nin% names(new_role)]) + new_role_direct = map(task$col_roles, .f = function(x) x[x %nin% names(new_role)]) # Add new role(s) for column(s) for which we change the role possible_col_roles = mlr3::mlr_reflections$task_col_roles[[task$task_type]] for (role in possible_col_roles) { - new_roles_direct[[role]] = union( - new_roles_direct[[role]], + new_role_direct[[role]] = union( + new_role_direct[[role]], names(which(unlist(map(new_role, .f = function(x) role %in% x)))) ) } } # Replace NULLs with character(0) - new_roles_direct = lapply(new_roles_direct, as.character) + new_role_direct = lapply(new_role_direct, as.character) # Changing the role of a target is not supported - cols = unlist(new_roles[names(new_roles_direct) != "target"]) + cols = unlist(new_roles[names(new_role_direct) != "target"]) if (any(task$col_roles$target %in% cols)) { stop("Cannot change the role of a target.") } # Update column roles - task$col_roles[names(new_roles_direct)] = new_roles_direct + task$col_roles[names(new_role_direct)] = new_role_direct task } From ba39a664280186c650261aefaaeef121c673bd60 Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 18 Dec 2024 11:46:17 +0100 Subject: [PATCH 17/17] Apply suggestions from code review --- R/PipeOpColRoles.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index 76bdeac6c..f5dd9d006 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -153,7 +153,7 @@ PipeOpColRoles = R6Class("PipeOpColRoles", new_role_direct = lapply(new_role_direct, as.character) # Changing the role of a target is not supported - cols = unlist(new_roles[names(new_role_direct) != "target"]) + cols = unlist(new_role_direct[names(new_role_direct) != "target"]) if (any(task$col_roles$target %in% cols)) { stop("Cannot change the role of a target.") }