From 36678642ac21bc27f3ff761b84b3c0eb40df4768 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sun, 14 Jul 2024 16:50:24 +0200 Subject: [PATCH 01/15] first draft, not cleaned up --- R/PipeOpRowApply.R | 154 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 R/PipeOpRowApply.R diff --git a/R/PipeOpRowApply.R b/R/PipeOpRowApply.R new file mode 100644 index 000000000..cda597def --- /dev/null +++ b/R/PipeOpRowApply.R @@ -0,0 +1,154 @@ +#' @title Apply a Function to each Column of a Task +#' +#' @usage NULL +#' @name mlr_pipeops_colapply +#' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @description +#' Applies a function to each column of a task. Use the `affect_columns` parameter inherited from +#' [`PipeOpTaskPreprocSimple`] to limit the columns this function should be applied to. This can be used +#' for simple parameter transformations or type conversions (e.g. `as.numeric`). +#' +#' The same function is applied during training and prediction. One important relationship for +#' machine learning preprocessing is that during the prediction phase, the preprocessing on each +#' data row should be independent of other rows. Therefore, the `applicator` function should always +#' return a vector / list where each result component only depends on the corresponding input component and +#' not on other components. As a rule of thumb, if the function `f` generates output different +#' from `Vectorize(f)`, it is not a function that should be used for `applicator`. +#' +#' @section Construction: +#' ``` +#' PipeOpColApply$new(id = "colapply", param_vals = list()) +#' ``` +#' * `id` :: `character(1)`\cr +#' Identifier of resulting object, default `"colapply"`. +#' * `param_vals` :: named `list`\cr +#' List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`. +#' +#' @section Input and Output Channels: +#' Input and output channels are inherited from [`PipeOpTaskPreprocSimple`]. +#' +#' The output is the input [`Task`][mlr3::Task] with features changed according to the `applicator` parameter. +#' +#' @section State: +#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreprocSimple`]. +#' +#' @section Parameters: +#' The parameters are the parameters inherited from [`PipeOpTaskPreprocSimple`], as well as: +#' * `applicator` :: `function`\cr +#' Function to apply to each column of the task. +#' The return value should be a `vector` of the same length as the input, i.e., the function vectorizes over the input. +#' A typical example would be `as.numeric`.\cr +#' The return value can also be a `matrix`, `data.frame`, or [`data.table`][data.table::data.table]. +#' In this case, the length of the input must match the number of returned rows. +#' The names of the resulting features of the output [`Task`][mlr3::Task] is based on the (column) name(s) of the return value of the applicator function, +#' prefixed with the original feature name separated by a dot (`.`). +#' Use [`Vectorize`][base::Vectorize] to create a vectorizing function from any function that ordinarily only takes one element input.\cr +#' +#' @section Internals: +#' Calls [`map`][mlr3misc::map] on the data, using the value of `applicator` as `f.` and coerces the output via [`as.data.table`]. +#' +#' @section Fields: +#' Only fields inherited from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @section Methods: +#' Only methods inherited from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @family PipeOps +#' @template seealso_pipeopslist +#' @include PipeOpTaskPreproc.R +#' @export +#' @examples +#' library("mlr3") +#' +#' task = tsk("iris") +#' poca = po("colapply", applicator = as.character) +#' poca$train(list(task))[[1]] # types are converted +#' +#' # function that does not vectorize +#' f1 = function(x) { +#' # we could use `ifelse` here, but that is not the point +#' if (x > 1) { +#' "a" +#' } else { +#' "b" +#' } +#' } +#' poca$param_set$values$applicator = Vectorize(f1) +#' poca$train(list(task))[[1]]$data() +#' +#' # only affect Petal.* columns +#' poca$param_set$values$affect_columns = selector_grep("^Petal") +#' poca$train(list(task))[[1]]$data() +#' +#' # function returning multiple columns +#' f2 = function(x) { +#' cbind(floor = floor(x), ceiling = ceiling(x)) +#' } +#' poca$param_set$values$applicator = f2 +#' poca$param_set$values$affect_columns = selector_all() +#' poca$train(list(task))[[1]]$data() +PipeOpRowApply = R6Class("PipeOpRowApply", + inherit = PipeOpTaskPreprocSimple, + public = list( + initialize = function(id = "rowapply", param_vals = list()) { + ps = ps( + applicator = p_uty(custom_check = check_function, tags = c("train", "predict")), + col_prefix = p_uty(custom_check = check_string) # tags necessary? + ) + ps$values = list( + applicator = identity, + col_prefix = "" + ) + super$initialize(id, ps, param_vals = param_vals) + if ("affect_columns" %nin% names(param_vals)) { + # can't put this in `ps$values` because it is a PipeOpTaskPreproc param + self$param_set$values$affect_columns = selector_type(c("numeric", "integer")) + # copied from PipeOpMissingIndicators + # decided for this (against param select_cols), since this seemed like the more natural solution + } + } + ), + private = list( + + .get_state_dt = function(dt, levels, target) { + list() + }, + + .transform_dt = function(dt, levels) { + applicator = self$param_set$values$applicator + col_prefix = self$param_set$values$col_prefix + cnames = colnames(dt) # better way, in ref. to tasK? this way: correct order guaranteed + + dt = t(apply(dt, 1, applicator)) + if (!(col_prefix == "")) { # could do nzchar, but we know it is of lenght 1 + cnames = paste(col_prefix, cnames, sep = ".") # is there a usual way this is done in mlr? + } + colnames(dt) = cnames # is setnames() better? + dt # currently doesnt keep col order, undesired? + } + ) +) + +mlr_pipeops$add("rowapply", PipeOpRowApply) + +#https://github.com/mlr-org/mlr3pipelines/issues/318 + +# Fragen: +# - What should this PO do exactly? Doesn't it change the column layout, so it couldn't be a TaskPreprocSimple? +# - how do we change the data in the task correctly? -> Examples +# - How do we initliaze the select_cols correctly? isnt it same as affect_col? -> Examples + +applicator = function(x) scale(x, scale = FALSE) +applicator = function(x) mean(x, na.rm = TRUE) +# how would row-wise mean work? add a column? would change layout of task +pora <- po("rowapply", applicator = applicator) +task <- tsk("titanic") +pora$train(list(task))[[1]]$head() +task$head() + +dt = task$data(cols = task$feature_names[c(1,4)], rows = 1:5) +t(apply(dt, 1, applicator)) + +# General question: When to use feature_type (implying .select_col), when to use affect_column? + From 96a756eb289292da89223c5bc2af6b476ac362f1 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sun, 14 Jul 2024 23:48:32 +0200 Subject: [PATCH 02/15] cleanup + initial tests --- R/PipeOpRowApply.R | 92 ++++++--------------------- tests/testthat/test_pipeop_rowapply.R | 17 +++++ 2 files changed, 36 insertions(+), 73 deletions(-) create mode 100644 tests/testthat/test_pipeop_rowapply.R diff --git a/R/PipeOpRowApply.R b/R/PipeOpRowApply.R index cda597def..1b0228280 100644 --- a/R/PipeOpRowApply.R +++ b/R/PipeOpRowApply.R @@ -1,34 +1,28 @@ -#' @title Apply a Function to each Column of a Task +#' @title Apply a Function to each Row of a Task #' #' @usage NULL -#' @name mlr_pipeops_colapply +#' @name mlr_pipeops_rowapply #' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @description -#' Applies a function to each column of a task. Use the `affect_columns` parameter inherited from +#' Applies a function to each row of a task. Use the `affect_columns` parameter inherited from #' [`PipeOpTaskPreprocSimple`] to limit the columns this function should be applied to. This can be used -#' for simple parameter transformations or type conversions (e.g. `as.numeric`). -#' -#' The same function is applied during training and prediction. One important relationship for -#' machine learning preprocessing is that during the prediction phase, the preprocessing on each -#' data row should be independent of other rows. Therefore, the `applicator` function should always -#' return a vector / list where each result component only depends on the corresponding input component and -#' not on other components. As a rule of thumb, if the function `f` generates output different -#' from `Vectorize(f)`, it is not a function that should be used for `applicator`. +#' for row-wise normalization. +#' The same function is applied during training and prediction. #' #' @section Construction: #' ``` -#' PipeOpColApply$new(id = "colapply", param_vals = list()) +#' PipeOpColApply$new(id = "rowapply", param_vals = list()) #' ``` #' * `id` :: `character(1)`\cr -#' Identifier of resulting object, default `"colapply"`. +#' Identifier of resulting object, default `"rowapply"`. #' * `param_vals` :: named `list`\cr #' List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`. #' #' @section Input and Output Channels: #' Input and output channels are inherited from [`PipeOpTaskPreprocSimple`]. #' -#' The output is the input [`Task`][mlr3::Task] with features changed according to the `applicator` parameter. +#' The output is the input [`Task`][mlr3::Task] with rows changed according to the `applicator` parameter. #' #' @section State: #' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreprocSimple`]. @@ -36,9 +30,9 @@ #' @section Parameters: #' The parameters are the parameters inherited from [`PipeOpTaskPreprocSimple`], as well as: #' * `applicator` :: `function`\cr -#' Function to apply to each column of the task. +#' Function to apply to each row of the task. #' The return value should be a `vector` of the same length as the input, i.e., the function vectorizes over the input. -#' A typical example would be `as.numeric`.\cr +#' A typical example would be `scale`.\cr #' The return value can also be a `matrix`, `data.frame`, or [`data.table`][data.table::data.table]. #' In this case, the length of the input must match the number of returned rows. #' The names of the resulting features of the output [`Task`][mlr3::Task] is based on the (column) name(s) of the return value of the applicator function, @@ -46,7 +40,7 @@ #' Use [`Vectorize`][base::Vectorize] to create a vectorizing function from any function that ordinarily only takes one element input.\cr #' #' @section Internals: -#' Calls [`map`][mlr3misc::map] on the data, using the value of `applicator` as `f.` and coerces the output via [`as.data.table`]. +#' Calls [`apply`] on the data, using the value of `applicator` as `FUN` and coerces the output via [`as.data.table`]. #' #' @section Fields: #' Only fields inherited from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`]. @@ -62,39 +56,15 @@ #' library("mlr3") #' #' task = tsk("iris") -#' poca = po("colapply", applicator = as.character) -#' poca$train(list(task))[[1]] # types are converted -#' -#' # function that does not vectorize -#' f1 = function(x) { -#' # we could use `ifelse` here, but that is not the point -#' if (x > 1) { -#' "a" -#' } else { -#' "b" -#' } -#' } -#' poca$param_set$values$applicator = Vectorize(f1) -#' poca$train(list(task))[[1]]$data() -#' -#' # only affect Petal.* columns -#' poca$param_set$values$affect_columns = selector_grep("^Petal") -#' poca$train(list(task))[[1]]$data() -#' -#' # function returning multiple columns -#' f2 = function(x) { -#' cbind(floor = floor(x), ceiling = ceiling(x)) -#' } -#' poca$param_set$values$applicator = f2 -#' poca$param_set$values$affect_columns = selector_all() -#' poca$train(list(task))[[1]]$data() +#' poca = po("rowapply", applicator = scale) +#' poca$train(list(task))[[1]] # rows are standardized PipeOpRowApply = R6Class("PipeOpRowApply", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "rowapply", param_vals = list()) { ps = ps( applicator = p_uty(custom_check = check_function, tags = c("train", "predict")), - col_prefix = p_uty(custom_check = check_string) # tags necessary? + col_prefix = p_uty(custom_check = check_string, tags = c("train", "predict")) ) ps$values = list( applicator = identity, @@ -104,8 +74,6 @@ PipeOpRowApply = R6Class("PipeOpRowApply", if ("affect_columns" %nin% names(param_vals)) { # can't put this in `ps$values` because it is a PipeOpTaskPreproc param self$param_set$values$affect_columns = selector_type(c("numeric", "integer")) - # copied from PipeOpMissingIndicators - # decided for this (against param select_cols), since this seemed like the more natural solution } } ), @@ -118,37 +86,15 @@ PipeOpRowApply = R6Class("PipeOpRowApply", .transform_dt = function(dt, levels) { applicator = self$param_set$values$applicator col_prefix = self$param_set$values$col_prefix - cnames = colnames(dt) # better way, in ref. to tasK? this way: correct order guaranteed - + cnames = colnames(dt) dt = t(apply(dt, 1, applicator)) - if (!(col_prefix == "")) { # could do nzchar, but we know it is of lenght 1 - cnames = paste(col_prefix, cnames, sep = ".") # is there a usual way this is done in mlr? + if (!(col_prefix == "")) { + cnames = paste(col_prefix, cnames, sep = ".") } - colnames(dt) = cnames # is setnames() better? - dt # currently doesnt keep col order, undesired? + colnames(dt) = cnames + dt } ) ) mlr_pipeops$add("rowapply", PipeOpRowApply) - -#https://github.com/mlr-org/mlr3pipelines/issues/318 - -# Fragen: -# - What should this PO do exactly? Doesn't it change the column layout, so it couldn't be a TaskPreprocSimple? -# - how do we change the data in the task correctly? -> Examples -# - How do we initliaze the select_cols correctly? isnt it same as affect_col? -> Examples - -applicator = function(x) scale(x, scale = FALSE) -applicator = function(x) mean(x, na.rm = TRUE) -# how would row-wise mean work? add a column? would change layout of task -pora <- po("rowapply", applicator = applicator) -task <- tsk("titanic") -pora$train(list(task))[[1]]$head() -task$head() - -dt = task$data(cols = task$feature_names[c(1,4)], rows = 1:5) -t(apply(dt, 1, applicator)) - -# General question: When to use feature_type (implying .select_col), when to use affect_column? - diff --git a/tests/testthat/test_pipeop_rowapply.R b/tests/testthat/test_pipeop_rowapply.R new file mode 100644 index 000000000..783706fef --- /dev/null +++ b/tests/testthat/test_pipeop_rowapply.R @@ -0,0 +1,17 @@ +context("PipeOpRowApply") + +test_that("apply general tests", { + + op = PipeOpRowApply$new() + expect_pipeop(op) + + task = mlr_tasks$get("iris") + expect_datapreproc_pipeop_class(PipeOpRowApply, task = task, + constargs = list(param_vals = list(applicator = as.integer))) + + expect_datapreproc_pipeop_class(PipeOpRolApply, task = mlr_tasks$get("pima"), + constargs = list(param_vals = list(applicator = as.numeric))) + +}) + + From c941b96c7790b545769e96b6b1f4f12a818db65d Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Mon, 15 Jul 2024 19:42:36 +0200 Subject: [PATCH 03/15] copied other tests --- tests/testthat/test_pipeop_rowapply.R | 102 ++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/tests/testthat/test_pipeop_rowapply.R b/tests/testthat/test_pipeop_rowapply.R index 783706fef..d16473a8e 100644 --- a/tests/testthat/test_pipeop_rowapply.R +++ b/tests/testthat/test_pipeop_rowapply.R @@ -15,3 +15,105 @@ test_that("apply general tests", { }) +test_that("apply results look as they should", { + + po = PipeOpRowApply$new() + task = mlr_tasks$get("iris") + + po$param_set$values = list(applicator = as.character) + + expect_equal( + po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) + + expect_equal( + po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) + + po$param_set$values = list(applicator = function(x) x^2) + + expect_equal( + po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) + + expect_equal( + po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) + + expect_equal( + po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) + + expect_equal( + po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) + + tomean = function(x) rep(mean(x), length(x)) + + po$param_set$values = list(applicator = tomean) + + expect_equal( + po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) + + expect_equal( + po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) + + expect_equal( + po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) + + expect_equal( + po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) + + po$param_set$values = list(applicator = as.character, affect_columns = selector_grep("^Sepal")) + + expect_equal( + po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) + + expect_equal( + po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) + + po$param_set$values = list(applicator = Vectorize(as.character), affect_columns = selector_grep("^Sepal")) + + expect_equal( + po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) + + expect_equal( + po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), + + ) +}) + +test_that("empty task", { + + task = tsk("iris")$filter(0L) + po = PipeOpRowApply$new() + po$param_set$values$applicator = function(x) as.integer(x) + + train_out = po$train(list(task))[[1L]] + expect_data_table(train_out$data(), nrows = 0L) + expect_true(all(train_out$feature_types$type == "integer")) + + predict_out = po$predict(list(task))[[1L]] + expect_data_table(predict_out$data(), nrows = 0L) + expect_true(all(predict_out$feature_types$type == "integer")) +}) From ae41af26ed8f11f5d8cc915784c96f339d5ce35a Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sun, 21 Jul 2024 23:31:50 +0200 Subject: [PATCH 04/15] WIP PO behav + tests --- R/PipeOpRowApply.R | 45 +++++++++++++++++++++------ tests/testthat/test_pipeop_rowapply.R | 30 ++++++++++++++---- 2 files changed, 59 insertions(+), 16 deletions(-) diff --git a/R/PipeOpRowApply.R b/R/PipeOpRowApply.R index 1b0228280..61181a6b0 100644 --- a/R/PipeOpRowApply.R +++ b/R/PipeOpRowApply.R @@ -70,11 +70,7 @@ PipeOpRowApply = R6Class("PipeOpRowApply", applicator = identity, col_prefix = "" ) - super$initialize(id, ps, param_vals = param_vals) - if ("affect_columns" %nin% names(param_vals)) { - # can't put this in `ps$values` because it is a PipeOpTaskPreproc param - self$param_set$values$affect_columns = selector_type(c("numeric", "integer")) - } + super$initialize(id, ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } ), private = list( @@ -85,14 +81,43 @@ PipeOpRowApply = R6Class("PipeOpRowApply", .transform_dt = function(dt, levels) { applicator = self$param_set$values$applicator + # FIXME: if user replaces this to be NULL, throws error later (defend against?) col_prefix = self$param_set$values$col_prefix cnames = colnames(dt) - dt = t(apply(dt, 1, applicator)) - if (!(col_prefix == "")) { - cnames = paste(col_prefix, cnames, sep = ".") + nc = ncol(dt) + nr = nrow(dt) + + res = apply(dt, 1, applicator) + + + # either change is.atomic check + # or do explicit check of output -> look for precedents in code + + if (is.list(res)) stop("Applicator generates a list. Applicator should generate either atomic vector or matrix.") + + # convert res into matrix to allow identical handling of column name(s) + if (test_atomic_vector(res)) { + res = matrix(res, nrow = 1) # nrow for faciliation of t() later + } + + if (is.matrix(res)) { + # matrix needs to be transposed for correct dimensions + res = t(res) + # for unnamed matrix use either original column names or generate names + if (is.null(colnames(res))) { + if (ncol(res) == nc) { + colnames(res) = cnames + } else { + colnames(res) = paste0("V", seq_along(ncol(res))) + } + } + } + + # add col_prefix + if (col_prefix != "") { + colnames(res) <- paste(col_prefix, colnames(res), sep = ".") } - colnames(dt) = cnames - dt + res } ) ) diff --git a/tests/testthat/test_pipeop_rowapply.R b/tests/testthat/test_pipeop_rowapply.R index d16473a8e..3b6f5054a 100644 --- a/tests/testthat/test_pipeop_rowapply.R +++ b/tests/testthat/test_pipeop_rowapply.R @@ -1,18 +1,36 @@ context("PipeOpRowApply") -test_that("apply general tests", { +test_that("PipeOpRowApply - basic properties", { op = PipeOpRowApply$new() + task = mlr_tasks$get("iris") expect_pipeop(op) + expect_datapreproc_pipeop_class(PipeOpRowApply, task = task) + +}) + +test_that("PipeOpRowApply - transform works as intended") + + # general test + op = PipeOpRowApply$new() task = mlr_tasks$get("iris") - expect_datapreproc_pipeop_class(PipeOpRowApply, task = task, - constargs = list(param_vals = list(applicator = as.integer))) - expect_datapreproc_pipeop_class(PipeOpRolApply, task = mlr_tasks$get("pima"), - constargs = list(param_vals = list(applicator = as.numeric))) + op$param_set$values$applicator = sum -}) + expect_equal( + op$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), + as.data.table(apply(iris[1:4], 1, sum)) + ) + +# general tests +# apply does what it should +# applicator generates vector (named / unnamed) +# applicator generates matrix (named / unnamed) +# error for applicator generates list +# applicator gibt nicht kompatible nrow? +# empty task +# task with one row test_that("apply results look as they should", { From 10320ae66ddd1394b8e31631db8604d64ea0d4e8 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 27 Jul 2024 11:55:57 +0200 Subject: [PATCH 05/15] WIP tests --- R/PipeOpRowApply.R | 31 +++---- tests/testthat/test_pipeop_rowapply.R | 119 +++++++++++--------------- 2 files changed, 62 insertions(+), 88 deletions(-) diff --git a/R/PipeOpRowApply.R b/R/PipeOpRowApply.R index 61181a6b0..b15306c0e 100644 --- a/R/PipeOpRowApply.R +++ b/R/PipeOpRowApply.R @@ -84,35 +84,26 @@ PipeOpRowApply = R6Class("PipeOpRowApply", # FIXME: if user replaces this to be NULL, throws error later (defend against?) col_prefix = self$param_set$values$col_prefix cnames = colnames(dt) - nc = ncol(dt) - nr = nrow(dt) res = apply(dt, 1, applicator) - - - # either change is.atomic check - # or do explicit check of output -> look for precedents in code - - if (is.list(res)) stop("Applicator generates a list. Applicator should generate either atomic vector or matrix.") - + if (!(test_atomic_vector(res) | test_matrix(res))) { + stop("Apply with FUN = applicator and simplified = TRUE should generate either atomic vector or matrix.") + } # convert res into matrix to allow identical handling of column name(s) if (test_atomic_vector(res)) { res = matrix(res, nrow = 1) # nrow for faciliation of t() later } + # matrix needs to be transposed for correct dimensions of Task + res = t(res) - if (is.matrix(res)) { - # matrix needs to be transposed for correct dimensions - res = t(res) - # for unnamed matrix use either original column names or generate names - if (is.null(colnames(res))) { - if (ncol(res) == nc) { - colnames(res) = cnames - } else { - colnames(res) = paste0("V", seq_along(ncol(res))) - } + # for unnamed matrix use either original column names or generate names + if (is.null(colnames(res))) { + if (ncol(res) == ncol(dt)) { + colnames(res) = cnames + } else { + colnames(res) = paste0("V", seq_len(ncol(res))) } } - # add col_prefix if (col_prefix != "") { colnames(res) <- paste(col_prefix, colnames(res), sep = ".") diff --git a/tests/testthat/test_pipeop_rowapply.R b/tests/testthat/test_pipeop_rowapply.R index 3b6f5054a..b139abf44 100644 --- a/tests/testthat/test_pipeop_rowapply.R +++ b/tests/testthat/test_pipeop_rowapply.R @@ -8,120 +8,100 @@ test_that("PipeOpRowApply - basic properties", { expect_datapreproc_pipeop_class(PipeOpRowApply, task = task) + expect_equal(op$train(list(task))[[1]]$nrow, task$nrow) + expect_equal(op$predict(list(task))[[1]]$nrow, task$nrow) + }) -test_that("PipeOpRowApply - transform works as intended") +test_that("PipeOpRowApply - transform works on task with only numeric features", { - # general test op = PipeOpRowApply$new() task = mlr_tasks$get("iris") - op$param_set$values$applicator = sum + # applicator generates matrix with names + applicator = function(x) x^2 + op$param_set$values$applicator = applicator expect_equal( op$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), - as.data.table(apply(iris[1:4], 1, sum)) - ) - -# general tests -# apply does what it should -# applicator generates vector (named / unnamed) -# applicator generates matrix (named / unnamed) -# error for applicator generates list -# applicator gibt nicht kompatible nrow? -# empty task -# task with one row - - -test_that("apply results look as they should", { - - po = PipeOpRowApply$new() - task = mlr_tasks$get("iris") - - po$param_set$values = list(applicator = as.character) - - expect_equal( - po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), - + as.data.table(t(apply(iris[1:4], 1, applicator))) ) - expect_equal( - po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), - + op$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), + as.data.table(t(apply(iris[1:4], 1, applicator))) ) - po$param_set$values = list(applicator = function(x) x^2) + # applicator generates matrix without names but same number of columns (should keep names) + applicator = scale + op$param_set$values$applicator = applicator + result = as.data.table(t(apply(iris[1:4], 1, applicator))) + setNames(result, colnames(iris[1:4])) expect_equal( - po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), - + op$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), + result ) - expect_equal( - po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), - + op$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), + result ) - expect_equal( - po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), + # applicator generates matrix without names but different number of columns (should generate new names) + applicator = function(x) rep(sum(x), 2) + op$param_set$values$applicator = applicator + expect_equal( + op$train(list(task))[[1]]$data(cols = c("V1", "V2")), + as.data.table(t(apply(iris[1:4], 1, applicator))) ) - expect_equal( - po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), - + op$predict(list(task))[[1]]$data(cols = c("V1", "V2")), + as.data.table(t(apply(iris[1:4], 1, applicator))) ) - tomean = function(x) rep(mean(x), length(x)) - po$param_set$values = list(applicator = tomean) + # applicator generates vector (should generate new name) + applicator = sum + op$param_set$values$applicator = applicator expect_equal( - po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), - + op$train(list(task))[[1]]$data(cols = "V1"), + as.data.table(t(matrix(apply(iris[1:4], 1, applicator), nrow = 1))) ) - expect_equal( - po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), - + op$predict(list(task))[[1]]$data(cols = "V1"), + as.data.table(t(matrix(apply(iris[1:4], 1, applicator), nrow = 1))) ) - expect_equal( - po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), + # error if apply generates anything but a matrix or vector (e.g. non-simplifiable list) + applicator = function(x) if(mean(x) < 3) c(x[[1]], x[[2]]) else x[[1]] + op$param_set$values$applicator = applicator - ) + expect_error(op$train(list(task))) + expect_error(op$predict(list(task))) - expect_equal( - po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), + # col_prefix - ) - po$param_set$values = list(applicator = as.character, affect_columns = selector_grep("^Sepal")) +}) - expect_equal( - po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), - ) +test_that("PipeOpRowApply - transform works on task with only integer features", { - expect_equal( - po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), +}) - ) - po$param_set$values = list(applicator = Vectorize(as.character), affect_columns = selector_grep("^Sepal")) +test_that("PipeOpRowApply - transform works on task with both numeric and integer features", { - expect_equal( - po$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), +}) - ) - expect_equal( - po$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), +test_that("PipeOpRowApply - transform works on task with only one row", { - ) }) -test_that("empty task", { + +test_that("PipeOpRowApply - transform works on empty task", { task = tsk("iris")$filter(0L) po = PipeOpRowApply$new() @@ -134,4 +114,7 @@ test_that("empty task", { predict_out = po$predict(list(task))[[1L]] expect_data_table(predict_out$data(), nrows = 0L) expect_true(all(predict_out$feature_types$type == "integer")) + }) + + From 1664d6e947b95869dcbbac3466ac3235343b294b Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 27 Jul 2024 15:45:12 +0200 Subject: [PATCH 06/15] decision path, how to handle 0 rows --- R/PipeOpRowApply.R | 5 +- tests/testthat/test_pipeop_rowapply.R | 343 +++++++++++++++++++++++++- 2 files changed, 341 insertions(+), 7 deletions(-) diff --git a/R/PipeOpRowApply.R b/R/PipeOpRowApply.R index b15306c0e..874606b33 100644 --- a/R/PipeOpRowApply.R +++ b/R/PipeOpRowApply.R @@ -80,6 +80,9 @@ PipeOpRowApply = R6Class("PipeOpRowApply", }, .transform_dt = function(dt, levels) { + # handle dt with no rows, return unchanged + if (nrow(dt) == 0) return(dt) + applicator = self$param_set$values$applicator # FIXME: if user replaces this to be NULL, throws error later (defend against?) col_prefix = self$param_set$values$col_prefix @@ -89,7 +92,7 @@ PipeOpRowApply = R6Class("PipeOpRowApply", if (!(test_atomic_vector(res) | test_matrix(res))) { stop("Apply with FUN = applicator and simplified = TRUE should generate either atomic vector or matrix.") } - # convert res into matrix to allow identical handling of column name(s) + # convert res into matrix for identical handling of column names if (test_atomic_vector(res)) { res = matrix(res, nrow = 1) # nrow for faciliation of t() later } diff --git a/tests/testthat/test_pipeop_rowapply.R b/tests/testthat/test_pipeop_rowapply.R index b139abf44..1dbdc10b2 100644 --- a/tests/testthat/test_pipeop_rowapply.R +++ b/tests/testthat/test_pipeop_rowapply.R @@ -8,12 +8,9 @@ test_that("PipeOpRowApply - basic properties", { expect_datapreproc_pipeop_class(PipeOpRowApply, task = task) - expect_equal(op$train(list(task))[[1]]$nrow, task$nrow) - expect_equal(op$predict(list(task))[[1]]$nrow, task$nrow) - }) -test_that("PipeOpRowApply - transform works on task with only numeric features", { +test_that("PipeOpRowApply - transform on task with only numeric features", { op = PipeOpRowApply$new() task = mlr_tasks$get("iris") @@ -35,7 +32,7 @@ test_that("PipeOpRowApply - transform works on task with only numeric features", applicator = scale op$param_set$values$applicator = applicator result = as.data.table(t(apply(iris[1:4], 1, applicator))) - setNames(result, colnames(iris[1:4])) + setnames(result, colnames(iris[1:4])) expect_equal( op$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), @@ -59,7 +56,6 @@ test_that("PipeOpRowApply - transform works on task with only numeric features", as.data.table(t(apply(iris[1:4], 1, applicator))) ) - # applicator generates vector (should generate new name) applicator = sum op$param_set$values$applicator = applicator @@ -73,6 +69,29 @@ test_that("PipeOpRowApply - transform works on task with only numeric features", as.data.table(t(matrix(apply(iris[1:4], 1, applicator), nrow = 1))) ) + # applicator is as.integer + applicator = as.integer + op$param_set$values$applicator = applicator + result = as.data.table(t(apply(iris[1:4], 1, applicator))) + setnames(result, colnames(iris[1:4])) + + expect_equal( + op$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), + result + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), + result + ) + + # predict task has 0 rows + task_predict = task$filter(0L) + + expect_equal( + op$predict(list(task_predict))[[1]]$data(cols = cnames), + + ) + # error if apply generates anything but a matrix or vector (e.g. non-simplifiable list) applicator = function(x) if(mean(x) < 3) c(x[[1]], x[[2]]) else x[[1]] op$param_set$values$applicator = applicator @@ -81,23 +100,335 @@ test_that("PipeOpRowApply - transform works on task with only numeric features", expect_error(op$predict(list(task))) # col_prefix + op$param_set$values$applicator = function(x) x^2 + op$param_set$values$col_prefix = "applied" + cnames = paste("applied", colnames(iris[1:4]), sep = ".") + expect_equal( + op$train(list(task))[[1]]$feature_names, + cnames + ) + expect_equal( + op$predict(list(task))[[1]]$feature_names, + cnames + ) }) test_that("PipeOpRowApply - transform works on task with only integer features", { + op = PipeOpRowApply$new() + task = mlr_tasks$get("german_credit") + cnames = c("age", "amount", "duration") + task$select(cnames) + german_credit = task$data(cols = cnames) + + # applicator generates matrix with names + applicator = function(x) x^2 + op$param_set$values$applicator = applicator + + expect_equal( + op$train(list(task))[[1]]$data(cols = cnames), + as.data.table(t(apply(german_credit, 1, applicator))) + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = cnames), + as.data.table(t(apply(german_credit, 1, applicator))) + ) + + # applicator generates matrix without names but same number of columns (should keep names) + applicator = scale + op$param_set$values$applicator = applicator + result = as.data.table(t(apply(german_credit, 1, applicator))) + setnames(result, cnames) + + expect_equal( + op$train(list(task))[[1]]$data(cols = cnames), + result + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = cnames), + result + ) + + # applicator generates matrix without names but different number of columns (should generate new names) + applicator = function(x) rep(sum(x), 2) + op$param_set$values$applicator = applicator + + expect_equal( + op$train(list(task))[[1]]$data(cols = c("V1", "V2")), + as.data.table(t(apply(german_credit, 1, applicator))) + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = c("V1", "V2")), + as.data.table(t(apply(german_credit, 1, applicator))) + ) + + # applicator generates vector (should generate new name) + applicator = sum + op$param_set$values$applicator = applicator + + expect_equal( + op$train(list(task))[[1]]$data(cols = "V1"), + as.data.table(t(matrix(apply(german_credit, 1, applicator), nrow = 1))) + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = "V1"), + as.data.table(t(matrix(apply(german_credit, 1, applicator), nrow = 1))) + ) + + # applicator is as.numeric + applicator = as.integer + op$param_set$values$applicator = applicator + result = as.data.table(t(apply(german_credit, 1, applicator))) + setnames(result, cnames) + + expect_equal( + op$train(list(task))[[1]]$data(cols = cnames), + result + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = cnames), + result + ) + + # predict task has 0 rows + task_predict = task$filter(0L) + + expect_equal( + op$predict(list(task_predict))[[1]]$data(cols = cnames), + + ) + + # error if apply generates anything but a matrix or vector (e.g. non-simplifiable list) + applicator = function(x) if(mean(x) < 1000) c(x[[1]], x[[2]]) else x[[1]] + op$param_set$values$applicator = applicator + + expect_error(op$train(list(task))) + expect_error(op$predict(list(task))) + + # col_prefix + op$param_set$values$applicator = function(x) x^2 + op$param_set$values$col_prefix = "applied" + cnames = paste("applied", cnames, sep = ".") + + expect_equal( + op$train(list(task))[[1]]$feature_names, + cnames + ) + expect_equal( + op$predict(list(task))[[1]]$feature_names, + cnames + ) + }) test_that("PipeOpRowApply - transform works on task with both numeric and integer features", { + op = PipeOpRowApply$new() + task = mlr_tasks$get("wine") + cnames = task$feature_names + wine = task$data(cols = cnames) + + # applicator generates matrix with names + applicator = function(x) x^2 + op$param_set$values$applicator = applicator + + expect_equal( + op$train(list(task))[[1]]$data(cols = cnames), + as.data.table(t(apply(wine, 1, applicator))) + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = cnames), + as.data.table(t(apply(wine, 1, applicator))) + ) + + # applicator generates matrix without names but same number of columns (should keep names) + applicator = scale + op$param_set$values$applicator = applicator + result = as.data.table(t(apply(wine, 1, applicator))) + setnames(result, cnames) + + expect_equal( + op$train(list(task))[[1]]$data(cols = cnames), + result + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = cnames), + result + ) + + # applicator generates matrix without names but different number of columns (should generate new names) + applicator = function(x) rep(sum(x), 2) + op$param_set$values$applicator = applicator + + expect_equal( + op$train(list(task))[[1]]$data(cols = c("V1", "V2")), + as.data.table(t(apply(wine, 1, applicator))) + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = c("V1", "V2")), + as.data.table(t(apply(wine, 1, applicator))) + ) + + # applicator generates vector (should generate new name) + applicator = sum + op$param_set$values$applicator = applicator + + expect_equal( + op$train(list(task))[[1]]$data(cols = "V1"), + as.data.table(t(matrix(apply(wine, 1, applicator), nrow = 1))) + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = "V1"), + as.data.table(t(matrix(apply(wine, 1, applicator), nrow = 1))) + ) + + # applicator is as.integer + applicator = as.integer + op$param_set$values$applicator = applicator + result = as.data.table(t(apply(wine, 1, applicator))) + setnames(result, cnames) + + expect_equal( + op$train(list(task))[[1]]$data(cols = cnames), + result + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = cnames), + result + ) + + # applicator is as.numeric + applicator = as.numeric + op$param_set$values$applicator = applicator + result = as.data.table(t(apply(wine, 1, applicator))) + setnames(result, cnames) + + expect_equal( + op$train(list(task))[[1]]$data(cols = cnames), + result + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = cnames), + result + ) + + # predict task has 0 rows + task_predict = task$filter(0L) + + expect_equal( + op$predict(list(task_predict))[[1]]$data(cols = cnames), + + ) + + # error if apply generates anything but a matrix or vector (e.g. non-simplifiable list) + applicator = function(x) if(mean(x) < 50) c(x[[1]], x[[2]]) else x[[1]] + op$param_set$values$applicator = applicator + + expect_error(op$train(list(task))) + expect_error(op$predict(list(task))) + + # col_prefix + op$param_set$values$applicator = function(x) x^2 + op$param_set$values$col_prefix = "applied" + cnames = paste("applied", cnames, sep = ".") + + expect_equal( + op$train(list(task))[[1]]$feature_names, + cnames + ) + expect_equal( + op$predict(list(task))[[1]]$feature_names, + cnames + ) + }) test_that("PipeOpRowApply - transform works on task with only one row", { + op = PipeOpRowApply$new() + task = mlr_tasks$get("wine")$filter(1) + cnames = task$feature_names + wine = task$data(cols = cnames) + + # applicator generates matrix with names + applicator = function(x) x^2 + op$param_set$values$applicator = applicator + + expect_equal( + op$train(list(task))[[1]]$data(cols = cnames), + as.data.table(t(apply(wine, 1, applicator))) + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = cnames), + as.data.table(t(apply(wine, 1, applicator))) + ) + + # applicator generates matrix without names but same number of columns (should keep names) + applicator = scale + op$param_set$values$applicator = applicator + result = as.data.table(t(apply(wine, 1, applicator))) + setnames(result, cnames) + + expect_equal( + op$train(list(task))[[1]]$data(cols = cnames), + result + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = cnames), + result + ) + + # applicator generates matrix without names but different number of columns (should generate new names) + applicator = function(x) rep(sum(x), 2) + op$param_set$values$applicator = applicator + + expect_equal( + op$train(list(task))[[1]]$data(cols = c("V1", "V2")), + as.data.table(t(apply(wine, 1, applicator))) + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = c("V1", "V2")), + as.data.table(t(apply(wine, 1, applicator))) + ) + + # applicator generates vector (should generate new name) + applicator = sum + op$param_set$values$applicator = applicator + + expect_equal( + op$train(list(task))[[1]]$data(cols = "V1"), + as.data.table(t(matrix(apply(wine, 1, applicator), nrow = 1))) + ) + expect_equal( + op$predict(list(task))[[1]]$data(cols = "V1"), + as.data.table(t(matrix(apply(wine, 1, applicator), nrow = 1))) + ) + +}) + +test_that("PipeOpRowApply - transform works on empty task (no rows)", { + + op = PipeOpRowApply$new() + task = mlr_tasks$get("wine")$filter(0) + + # applicator is as.integer + applicator = as.integer + op$param_set$values$applicator = applicator + + expect_equal( + op$train(list(task))[[1]]$data(), + task$data() + ) + expect_equal( + op$predict(list(task))[[1]]$data(), + task$data() + ) + }) From 035a021e851f8f369390634c654769b7a977c78b Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 27 Jul 2024 17:53:40 +0200 Subject: [PATCH 07/15] WIP tests, before structuring --- R/PipeOpRowApply.R | 15 ++++++++++++--- tests/testthat/test_pipeop_rowapply.R | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/R/PipeOpRowApply.R b/R/PipeOpRowApply.R index 874606b33..9a0792753 100644 --- a/R/PipeOpRowApply.R +++ b/R/PipeOpRowApply.R @@ -80,14 +80,17 @@ PipeOpRowApply = R6Class("PipeOpRowApply", }, .transform_dt = function(dt, levels) { - # handle dt with no rows, return unchanged - if (nrow(dt) == 0) return(dt) - applicator = self$param_set$values$applicator # FIXME: if user replaces this to be NULL, throws error later (defend against?) col_prefix = self$param_set$values$col_prefix cnames = colnames(dt) + # handle dt with 0 rows: give filler content to find out what applicator does + if (nrow(dt) == 0) { + dt = dt[NA_integer_] + was_empty = TRUE + } + res = apply(dt, 1, applicator) if (!(test_atomic_vector(res) | test_matrix(res))) { stop("Apply with FUN = applicator and simplified = TRUE should generate either atomic vector or matrix.") @@ -111,6 +114,12 @@ PipeOpRowApply = R6Class("PipeOpRowApply", if (col_prefix != "") { colnames(res) <- paste(col_prefix, colnames(res), sep = ".") } + + # handle dt with 0 rows: remove filler content + if (was_empty == TRUE) { + res = res[0L, ] + } + res } ) diff --git a/tests/testthat/test_pipeop_rowapply.R b/tests/testthat/test_pipeop_rowapply.R index 1dbdc10b2..ca7b5b8ef 100644 --- a/tests/testthat/test_pipeop_rowapply.R +++ b/tests/testthat/test_pipeop_rowapply.R @@ -417,7 +417,7 @@ test_that("PipeOpRowApply - transform works on empty task (no rows)", { task = mlr_tasks$get("wine")$filter(0) # applicator is as.integer - applicator = as.integer + applicator = function(x) rep(mean(x), 2) op$param_set$values$applicator = applicator expect_equal( From 18aac8139e0acf91a05104a13056fa8f9e3c2c9c Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 27 Jul 2024 18:45:31 +0200 Subject: [PATCH 08/15] tests finished --- R/PipeOpRowApply.R | 10 +- tests/testthat/test_pipeop_rowapply.R | 264 +++++++++++++------------- 2 files changed, 136 insertions(+), 138 deletions(-) diff --git a/R/PipeOpRowApply.R b/R/PipeOpRowApply.R index 9a0792753..dd08e55c0 100644 --- a/R/PipeOpRowApply.R +++ b/R/PipeOpRowApply.R @@ -7,7 +7,7 @@ #' @description #' Applies a function to each row of a task. Use the `affect_columns` parameter inherited from #' [`PipeOpTaskPreprocSimple`] to limit the columns this function should be applied to. This can be used -#' for row-wise normalization. +#' for row-wise normalization or creation of new columns from values per row in general. #' The same function is applied during training and prediction. #' #' @section Construction: @@ -38,6 +38,8 @@ #' The names of the resulting features of the output [`Task`][mlr3::Task] is based on the (column) name(s) of the return value of the applicator function, #' prefixed with the original feature name separated by a dot (`.`). #' Use [`Vectorize`][base::Vectorize] to create a vectorizing function from any function that ordinarily only takes one element input.\cr +#' * `col_prefix` :: `character`\cr +#' Optional. Character vector of length one as prefix for newly generated columns. #' #' @section Internals: #' Calls [`apply`] on the data, using the value of `applicator` as `FUN` and coerces the output via [`as.data.table`]. @@ -56,8 +58,8 @@ #' library("mlr3") #' #' task = tsk("iris") -#' poca = po("rowapply", applicator = scale) -#' poca$train(list(task))[[1]] # rows are standardized +#' pora = po("rowapply", applicator = scale) +#' pora$train(list(task))[[1]] # rows are standardized PipeOpRowApply = R6Class("PipeOpRowApply", inherit = PipeOpTaskPreprocSimple, public = list( @@ -89,6 +91,8 @@ PipeOpRowApply = R6Class("PipeOpRowApply", if (nrow(dt) == 0) { dt = dt[NA_integer_] was_empty = TRUE + } else { + was_empty = FALSE } res = apply(dt, 1, applicator) diff --git a/tests/testthat/test_pipeop_rowapply.R b/tests/testthat/test_pipeop_rowapply.R index ca7b5b8ef..50cc9f718 100644 --- a/tests/testthat/test_pipeop_rowapply.R +++ b/tests/testthat/test_pipeop_rowapply.R @@ -14,32 +14,34 @@ test_that("PipeOpRowApply - transform on task with only numeric features", { op = PipeOpRowApply$new() task = mlr_tasks$get("iris") + cnames = task$feature_names + iris = task$data(cols = cnames) # applicator generates matrix with names applicator = function(x) x^2 op$param_set$values$applicator = applicator expect_equal( - op$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), - as.data.table(t(apply(iris[1:4], 1, applicator))) + op$train(list(task))[[1]]$data(cols = cnames), + as.data.table(t(apply(iris, 1, applicator))) ) expect_equal( - op$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), - as.data.table(t(apply(iris[1:4], 1, applicator))) + op$predict(list(task))[[1]]$data(cols = cnames), + as.data.table(t(apply(iris, 1, applicator))) ) # applicator generates matrix without names but same number of columns (should keep names) - applicator = scale + applicator = as.integer op$param_set$values$applicator = applicator - result = as.data.table(t(apply(iris[1:4], 1, applicator))) - setnames(result, colnames(iris[1:4])) + result = as.data.table(t(apply(iris, 1, applicator))) + setnames(result, cnames) expect_equal( - op$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), + op$train(list(task))[[1]]$data(cols = cnames), result ) expect_equal( - op$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), + op$predict(list(task))[[1]]$data(cols = cnames), result ) @@ -49,11 +51,11 @@ test_that("PipeOpRowApply - transform on task with only numeric features", { expect_equal( op$train(list(task))[[1]]$data(cols = c("V1", "V2")), - as.data.table(t(apply(iris[1:4], 1, applicator))) + as.data.table(t(apply(iris, 1, applicator))) ) expect_equal( op$predict(list(task))[[1]]$data(cols = c("V1", "V2")), - as.data.table(t(apply(iris[1:4], 1, applicator))) + as.data.table(t(apply(iris, 1, applicator))) ) # applicator generates vector (should generate new name) @@ -62,34 +64,11 @@ test_that("PipeOpRowApply - transform on task with only numeric features", { expect_equal( op$train(list(task))[[1]]$data(cols = "V1"), - as.data.table(t(matrix(apply(iris[1:4], 1, applicator), nrow = 1))) + as.data.table(t(matrix(apply(iris, 1, applicator), nrow = 1))) ) expect_equal( op$predict(list(task))[[1]]$data(cols = "V1"), - as.data.table(t(matrix(apply(iris[1:4], 1, applicator), nrow = 1))) - ) - - # applicator is as.integer - applicator = as.integer - op$param_set$values$applicator = applicator - result = as.data.table(t(apply(iris[1:4], 1, applicator))) - setnames(result, colnames(iris[1:4])) - - expect_equal( - op$train(list(task))[[1]]$data(cols = colnames(iris[1:4])), - result - ) - expect_equal( - op$predict(list(task))[[1]]$data(cols = colnames(iris[1:4])), - result - ) - - # predict task has 0 rows - task_predict = task$filter(0L) - - expect_equal( - op$predict(list(task_predict))[[1]]$data(cols = cnames), - + as.data.table(t(matrix(apply(iris, 1, applicator), nrow = 1))) ) # error if apply generates anything but a matrix or vector (e.g. non-simplifiable list) @@ -102,16 +81,10 @@ test_that("PipeOpRowApply - transform on task with only numeric features", { # col_prefix op$param_set$values$applicator = function(x) x^2 op$param_set$values$col_prefix = "applied" - cnames = paste("applied", colnames(iris[1:4]), sep = ".") + cnames = paste("applied", task$feature_names, sep = ".") - expect_equal( - op$train(list(task))[[1]]$feature_names, - cnames - ) - expect_equal( - op$predict(list(task))[[1]]$feature_names, - cnames - ) + expect_equal(op$train(list(task))[[1]]$feature_names, cnames) + expect_equal(op$predict(list(task))[[1]]$feature_names, cnames) }) @@ -138,7 +111,7 @@ test_that("PipeOpRowApply - transform works on task with only integer features", ) # applicator generates matrix without names but same number of columns (should keep names) - applicator = scale + applicator = as.numeric op$param_set$values$applicator = applicator result = as.data.table(t(apply(german_credit, 1, applicator))) setnames(result, cnames) @@ -178,29 +151,6 @@ test_that("PipeOpRowApply - transform works on task with only integer features", as.data.table(t(matrix(apply(german_credit, 1, applicator), nrow = 1))) ) - # applicator is as.numeric - applicator = as.integer - op$param_set$values$applicator = applicator - result = as.data.table(t(apply(german_credit, 1, applicator))) - setnames(result, cnames) - - expect_equal( - op$train(list(task))[[1]]$data(cols = cnames), - result - ) - expect_equal( - op$predict(list(task))[[1]]$data(cols = cnames), - result - ) - - # predict task has 0 rows - task_predict = task$filter(0L) - - expect_equal( - op$predict(list(task_predict))[[1]]$data(cols = cnames), - - ) - # error if apply generates anything but a matrix or vector (e.g. non-simplifiable list) applicator = function(x) if(mean(x) < 1000) c(x[[1]], x[[2]]) else x[[1]] op$param_set$values$applicator = applicator @@ -213,14 +163,8 @@ test_that("PipeOpRowApply - transform works on task with only integer features", op$param_set$values$col_prefix = "applied" cnames = paste("applied", cnames, sep = ".") - expect_equal( - op$train(list(task))[[1]]$feature_names, - cnames - ) - expect_equal( - op$predict(list(task))[[1]]$feature_names, - cnames - ) + expect_equal(op$train(list(task))[[1]]$feature_names, cnames) + expect_equal(op$predict(list(task))[[1]]$feature_names, cnames) }) @@ -246,7 +190,7 @@ test_that("PipeOpRowApply - transform works on task with both numeric and intege ) # applicator generates matrix without names but same number of columns (should keep names) - applicator = scale + applicator = as.integer op$param_set$values$applicator = applicator result = as.data.table(t(apply(wine, 1, applicator))) setnames(result, cnames) @@ -286,44 +230,6 @@ test_that("PipeOpRowApply - transform works on task with both numeric and intege as.data.table(t(matrix(apply(wine, 1, applicator), nrow = 1))) ) - # applicator is as.integer - applicator = as.integer - op$param_set$values$applicator = applicator - result = as.data.table(t(apply(wine, 1, applicator))) - setnames(result, cnames) - - expect_equal( - op$train(list(task))[[1]]$data(cols = cnames), - result - ) - expect_equal( - op$predict(list(task))[[1]]$data(cols = cnames), - result - ) - - # applicator is as.numeric - applicator = as.numeric - op$param_set$values$applicator = applicator - result = as.data.table(t(apply(wine, 1, applicator))) - setnames(result, cnames) - - expect_equal( - op$train(list(task))[[1]]$data(cols = cnames), - result - ) - expect_equal( - op$predict(list(task))[[1]]$data(cols = cnames), - result - ) - - # predict task has 0 rows - task_predict = task$filter(0L) - - expect_equal( - op$predict(list(task_predict))[[1]]$data(cols = cnames), - - ) - # error if apply generates anything but a matrix or vector (e.g. non-simplifiable list) applicator = function(x) if(mean(x) < 50) c(x[[1]], x[[2]]) else x[[1]] op$param_set$values$applicator = applicator @@ -411,41 +317,129 @@ test_that("PipeOpRowApply - transform works on task with only one row", { }) + test_that("PipeOpRowApply - transform works on empty task (no rows)", { op = PipeOpRowApply$new() task = mlr_tasks$get("wine")$filter(0) + cnames = task$feature_names - # applicator is as.integer - applicator = function(x) rep(mean(x), 2) + # applicator generates matrix with names + applicator = function(x) x^2 op$param_set$values$applicator = applicator - expect_equal( - op$train(list(task))[[1]]$data(), - task$data() - ) - expect_equal( - op$predict(list(task))[[1]]$data(), - task$data() - ) + train_out = op$train(list(task))[[1]] + expect_data_table(train_out$data(), nrows = 0) + expect_set_equal(train_out$feature_names, cnames) -}) + predict_out = op$predict(list(task))[[1]] + expect_data_table(predict_out$data(), nrows = 0) + expect_set_equal(predict_out$feature_names, cnames) + + # applicator generates matrix without names but same number of columns (should keep names) + applicator = scale + op$param_set$values$applicator = applicator + + train_out = op$train(list(task))[[1]] + expect_data_table(train_out$data(), nrows = 0) + expect_set_equal(train_out$feature_names, cnames) + + predict_out = op$predict(list(task))[[1]] + expect_data_table(predict_out$data(), nrows = 0) + expect_set_equal(predict_out$feature_names, cnames) + + # applicator generates matrix without names but different number of columns (should generate new names) + applicator = function(x) rep(sum(x), 2) + op$param_set$values$applicator = applicator + train_out = op$train(list(task))[[1]] + expect_data_table(train_out$data(), nrows = 0) + expect_set_equal(train_out$feature_names, c("V1", "V2")) -test_that("PipeOpRowApply - transform works on empty task", { + predict_out = op$predict(list(task))[[1]] + expect_data_table(predict_out$data(), nrows = 0) + expect_set_equal(predict_out$feature_names, c("V1", c("V2"))) - task = tsk("iris")$filter(0L) - po = PipeOpRowApply$new() - po$param_set$values$applicator = function(x) as.integer(x) + # applicator generates vector (should generate new name) + applicator = sum + op$param_set$values$applicator = applicator + + train_out = op$train(list(task))[[1]] + expect_data_table(train_out$data(), nrows = 0) + expect_set_equal(train_out$feature_names, "V1") + + predict_out = op$predict(list(task))[[1]] + expect_data_table(predict_out$data(), nrows = 0) + expect_set_equal(predict_out$feature_names, "V1") - train_out = po$train(list(task))[[1L]] - expect_data_table(train_out$data(), nrows = 0L) + # applicator changes feature type + applicator = as.integer + op$param_set$values$applicator = applicator + + train_out = op$train(list(task))[[1]] + expect_data_table(train_out$data(), nrows = 0) + expect_set_equal(train_out$feature_names, cnames) expect_true(all(train_out$feature_types$type == "integer")) - predict_out = po$predict(list(task))[[1L]] - expect_data_table(predict_out$data(), nrows = 0L) + predict_out = op$predict(list(task))[[1]] + expect_data_table(predict_out$data(), nrows = 0) + expect_set_equal(predict_out$feature_names, cnames) expect_true(all(predict_out$feature_types$type == "integer")) }) +test_that("PipeOpRowApply - transform works for only empty predict task (no rows)", { + + op = PipeOpRowApply$new() + task_train = mlr_tasks$get("wine") + task_predict = task$filter(0) + cnames = task$feature_names + + # applicator generates matrix with names + applicator = function(x) x^2 + op$param_set$values$applicator = applicator + + op$train(list(task_train)) + predict_out = op$predict(list(task_predict))[[1]] + expect_data_table(predict_out$data(), nrows = 0) + expect_set_equal(predict_out$feature_names, cnames) + + # applicator generates matrix without names but same number of columns (should keep names) + applicator = scale + op$param_set$values$applicator = applicator + + op$train(list(task_train)) + predict_out = op$predict(list(task_predict))[[1]] + expect_data_table(predict_out$data(), nrows = 0) + expect_set_equal(predict_out$feature_names, cnames) + + # applicator generates matrix without names but different number of columns (should generate new names) + applicator = function(x) rep(sum(x), 2) + op$param_set$values$applicator = applicator + + op$train(list(task_train)) + predict_out = op$predict(list(task_predict))[[1]] + expect_data_table(predict_out$data(), nrows = 0) + expect_set_equal(predict_out$feature_names, c("V1", c("V2"))) + + # applicator generates vector (should generate new name) + applicator = sum + op$param_set$values$applicator = applicator + + op$train(list(task_train)) + predict_out = op$predict(list(task_predict))[[1]] + expect_data_table(predict_out$data(), nrows = 0) + expect_set_equal(predict_out$feature_names, "V1") + + # applicator changes feature type + applicator = as.integer + op$param_set$values$applicator = applicator + + op$train(list(task_train)) + predict_out = op$predict(list(task_predict))[[1]] + expect_data_table(predict_out$data(), nrows = 0) + expect_set_equal(predict_out$feature_names, cnames) + expect_true(all(predict_out$feature_types$type == "integer")) + +}) From 1bd46b92662a855a0df5c0beaf213e58dc24e619 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 27 Jul 2024 18:58:08 +0200 Subject: [PATCH 09/15] improved comments --- R/PipeOpRowApply.R | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/R/PipeOpRowApply.R b/R/PipeOpRowApply.R index dd08e55c0..ea0012336 100644 --- a/R/PipeOpRowApply.R +++ b/R/PipeOpRowApply.R @@ -83,11 +83,10 @@ PipeOpRowApply = R6Class("PipeOpRowApply", .transform_dt = function(dt, levels) { applicator = self$param_set$values$applicator - # FIXME: if user replaces this to be NULL, throws error later (defend against?) col_prefix = self$param_set$values$col_prefix cnames = colnames(dt) - # handle dt with 0 rows: give filler content to find out what applicator does + # Handle data table with zero rows by adding filler content to emulate column creation later if (nrow(dt) == 0) { dt = dt[NA_integer_] was_empty = TRUE @@ -99,14 +98,14 @@ PipeOpRowApply = R6Class("PipeOpRowApply", if (!(test_atomic_vector(res) | test_matrix(res))) { stop("Apply with FUN = applicator and simplified = TRUE should generate either atomic vector or matrix.") } - # convert res into matrix for identical handling of column names + # Convert result to a matrix for consistent column name handling if (test_atomic_vector(res)) { - res = matrix(res, nrow = 1) # nrow for faciliation of t() later + res = matrix(res, nrow = 1) # Ensure matrix has one row for correct transposition } - # matrix needs to be transposed for correct dimensions of Task + # Transpose the matrix for correct Task dimensions res = t(res) - # for unnamed matrix use either original column names or generate names + # Assign column names if they are missing if (is.null(colnames(res))) { if (ncol(res) == ncol(dt)) { colnames(res) = cnames @@ -114,12 +113,12 @@ PipeOpRowApply = R6Class("PipeOpRowApply", colnames(res) = paste0("V", seq_len(ncol(res))) } } - # add col_prefix + # Prepend column prefix if specified if (col_prefix != "") { colnames(res) <- paste(col_prefix, colnames(res), sep = ".") } - # handle dt with 0 rows: remove filler content + # Remove filler content if the original data.table had zero rows if (was_empty == TRUE) { res = res[0L, ] } From 9553a3d15ec2229a8acdad27f33867d5573d1437 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sun, 28 Jul 2024 16:28:51 +0200 Subject: [PATCH 10/15] DOCU RowApply --- R/PipeOpRowApply.R | 26 +++--- man/mlr_pipeops_rowapply.Rd | 155 ++++++++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+), 14 deletions(-) create mode 100644 man/mlr_pipeops_rowapply.Rd diff --git a/R/PipeOpRowApply.R b/R/PipeOpRowApply.R index ea0012336..9e34d5e62 100644 --- a/R/PipeOpRowApply.R +++ b/R/PipeOpRowApply.R @@ -6,9 +6,7 @@ #' #' @description #' Applies a function to each row of a task. Use the `affect_columns` parameter inherited from -#' [`PipeOpTaskPreprocSimple`] to limit the columns this function should be applied to. This can be used -#' for row-wise normalization or creation of new columns from values per row in general. -#' The same function is applied during training and prediction. +#' [`PipeOpTaskPreprocSimple`] to limit the columns this function should be applied to. #' #' @section Construction: #' ``` @@ -22,7 +20,8 @@ #' @section Input and Output Channels: #' Input and output channels are inherited from [`PipeOpTaskPreprocSimple`]. #' -#' The output is the input [`Task`][mlr3::Task] with rows changed according to the `applicator` parameter. +#' The output is the input [`Task`][mlr3::Task] with the original affected columns replaced by the columns created by +#' applying `applicator` to each row. #' #' @section State: #' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreprocSimple`]. @@ -30,19 +29,18 @@ #' @section Parameters: #' The parameters are the parameters inherited from [`PipeOpTaskPreprocSimple`], as well as: #' * `applicator` :: `function`\cr -#' Function to apply to each row of the task. +#' Function to apply to each row in the affected columns of the task. #' The return value should be a `vector` of the same length as the input, i.e., the function vectorizes over the input. -#' A typical example would be `scale`.\cr -#' The return value can also be a `matrix`, `data.frame`, or [`data.table`][data.table::data.table]. +#' Alternatively, the return value can be a `matrix`, `data.frame`, or [`data.table`][data.table::data.table]. #' In this case, the length of the input must match the number of returned rows. -#' The names of the resulting features of the output [`Task`][mlr3::Task] is based on the (column) name(s) of the return value of the applicator function, -#' prefixed with the original feature name separated by a dot (`.`). -#' Use [`Vectorize`][base::Vectorize] to create a vectorizing function from any function that ordinarily only takes one element input.\cr -#' * `col_prefix` :: `character`\cr -#' Optional. Character vector of length one as prefix for newly generated columns. +#' Use [`Vectorize`][base::Vectorize] to create a vectorizing function from any function that ordinarily only takes one element input. +#' Default is [identity()][base::identity] +#' * `col_prefix` :: `character(1)`\cr +#' If specified, prefix to be prepended to the column names of affected columns, separated by a dot (`.`). Default is `character(1)`. #' #' @section Internals: -#' Calls [`apply`] on the data, using the value of `applicator` as `FUN` and coerces the output via [`as.data.table`]. +#' Calls [`apply`] on the data, using the value of `applicator` as `FUN` and `simplify = TRUE`, then coerces the output via +#' [`as.data.table()`][data.table::as.data.table]. #' #' @section Fields: #' Only fields inherited from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`]. @@ -88,7 +86,7 @@ PipeOpRowApply = R6Class("PipeOpRowApply", # Handle data table with zero rows by adding filler content to emulate column creation later if (nrow(dt) == 0) { - dt = dt[NA_integer_] + dt = dt[NA_integer_] # Adds emtpy row was_empty = TRUE } else { was_empty = FALSE diff --git a/man/mlr_pipeops_rowapply.Rd b/man/mlr_pipeops_rowapply.Rd new file mode 100644 index 000000000..ca1f89e5e --- /dev/null +++ b/man/mlr_pipeops_rowapply.Rd @@ -0,0 +1,155 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PipeOpRowApply.R +\name{mlr_pipeops_rowapply} +\alias{mlr_pipeops_rowapply} +\alias{PipeOpRowApply} +\title{Apply a Function to each Row of a Task} +\format{ +\code{\link[R6:R6Class]{R6Class}} object inheriting from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} +\description{ +Applies a function to each row of a task. Use the \code{affect_columns} parameter inherited from +\code{\link{PipeOpTaskPreprocSimple}} to limit the columns this function should be applied to. +} +\section{Construction}{ + + +\if{html}{\out{
}}\preformatted{PipeOpColApply$new(id = "rowapply", param_vals = list()) +}\if{html}{\out{
}} +\itemize{ +\item \code{id} :: \code{character(1)}\cr +Identifier of resulting object, default \code{"rowapply"}. +\item \code{param_vals} :: named \code{list}\cr +List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}. +} +} + +\section{Input and Output Channels}{ + +Input and output channels are inherited from \code{\link{PipeOpTaskPreprocSimple}}. + +The output is the input \code{\link[mlr3:Task]{Task}} with the original affected columns replaced by the columns created by +applying \code{applicator} to each row. +} + +\section{State}{ + +The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreprocSimple}}. +} + +\section{Parameters}{ + +The parameters are the parameters inherited from \code{\link{PipeOpTaskPreprocSimple}}, as well as: +\itemize{ +\item \code{applicator} :: \code{function}\cr +Function to apply to each row in the affected columns of the task. +The return value should be a \code{vector} of the same length as the input, i.e., the function vectorizes over the input. +Alternatively, the return value can be a \code{matrix}, \code{data.frame}, or \code{\link[data.table:data.table]{data.table}}. +In this case, the length of the input must match the number of returned rows. +Use \code{\link[base:Vectorize]{Vectorize}} to create a vectorizing function from any function that ordinarily only takes one element input. +Default is \link[base:identity]{identity()} +\item \code{col_prefix} :: \code{character(1)}\cr +If specified, prefix to be prepended to the column names of affected columns, separated by a dot (\code{.}). Default is \code{character(1)}. +} +} + +\section{Internals}{ + +Calls \code{\link{apply}} on the data, using the value of \code{applicator} as \code{FUN} and \code{simplify = TRUE}, then coerces the output via +\code{\link[data.table:as.data.table]{as.data.table()}}. +} + +\section{Fields}{ + +Only fields inherited from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\section{Methods}{ + +Only methods inherited from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\examples{ +library("mlr3") + +task = tsk("iris") +pora = po("rowapply", applicator = scale) +pora$train(list(task))[[1]] # rows are standardized +} +\seealso{ +https://mlr-org.com/pipeops.html + +Other PipeOps: +\code{\link{PipeOp}}, +\code{\link{PipeOpEnsemble}}, +\code{\link{PipeOpImpute}}, +\code{\link{PipeOpTargetTrafo}}, +\code{\link{PipeOpTaskPreproc}}, +\code{\link{PipeOpTaskPreprocSimple}}, +\code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_boxcox}}, +\code{\link{mlr_pipeops_branch}}, +\code{\link{mlr_pipeops_chunk}}, +\code{\link{mlr_pipeops_classbalancing}}, +\code{\link{mlr_pipeops_classifavg}}, +\code{\link{mlr_pipeops_classweights}}, +\code{\link{mlr_pipeops_colapply}}, +\code{\link{mlr_pipeops_collapsefactors}}, +\code{\link{mlr_pipeops_colroles}}, +\code{\link{mlr_pipeops_copy}}, +\code{\link{mlr_pipeops_datefeatures}}, +\code{\link{mlr_pipeops_encode}}, +\code{\link{mlr_pipeops_encodeimpact}}, +\code{\link{mlr_pipeops_encodelmer}}, +\code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filter}}, +\code{\link{mlr_pipeops_fixfactors}}, +\code{\link{mlr_pipeops_histbin}}, +\code{\link{mlr_pipeops_ica}}, +\code{\link{mlr_pipeops_imputeconstant}}, +\code{\link{mlr_pipeops_imputehist}}, +\code{\link{mlr_pipeops_imputelearner}}, +\code{\link{mlr_pipeops_imputemean}}, +\code{\link{mlr_pipeops_imputemedian}}, +\code{\link{mlr_pipeops_imputemode}}, +\code{\link{mlr_pipeops_imputeoor}}, +\code{\link{mlr_pipeops_imputesample}}, +\code{\link{mlr_pipeops_kernelpca}}, +\code{\link{mlr_pipeops_learner}}, +\code{\link{mlr_pipeops_missind}}, +\code{\link{mlr_pipeops_modelmatrix}}, +\code{\link{mlr_pipeops_multiplicityexply}}, +\code{\link{mlr_pipeops_multiplicityimply}}, +\code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nmf}}, +\code{\link{mlr_pipeops_nop}}, +\code{\link{mlr_pipeops_ovrsplit}}, +\code{\link{mlr_pipeops_ovrunite}}, +\code{\link{mlr_pipeops_pca}}, +\code{\link{mlr_pipeops_proxy}}, +\code{\link{mlr_pipeops_quantilebin}}, +\code{\link{mlr_pipeops_randomprojection}}, +\code{\link{mlr_pipeops_randomresponse}}, +\code{\link{mlr_pipeops_regravg}}, +\code{\link{mlr_pipeops_removeconstants}}, +\code{\link{mlr_pipeops_renamecolumns}}, +\code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_scale}}, +\code{\link{mlr_pipeops_scalemaxabs}}, +\code{\link{mlr_pipeops_scalerange}}, +\code{\link{mlr_pipeops_select}}, +\code{\link{mlr_pipeops_smote}}, +\code{\link{mlr_pipeops_spatialsign}}, +\code{\link{mlr_pipeops_subsample}}, +\code{\link{mlr_pipeops_targetinvert}}, +\code{\link{mlr_pipeops_targetmutate}}, +\code{\link{mlr_pipeops_targettrafoscalerange}}, +\code{\link{mlr_pipeops_textvectorizer}}, +\code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_unbranch}}, +\code{\link{mlr_pipeops_updatetarget}}, +\code{\link{mlr_pipeops_vtreat}}, +\code{\link{mlr_pipeops_yeojohnson}} +} +\concept{PipeOps} From 7767a3d0a7f6c5ac1564cf51524aa777bb063262 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sun, 28 Jul 2024 17:16:08 +0200 Subject: [PATCH 11/15] DOC and meta data --- DESCRIPTION | 1 + NAMESPACE | 1 + R/PipeOpRowApply.R | 2 +- man/PipeOp.Rd | 1 + man/PipeOpEnsemble.Rd | 1 + man/PipeOpImpute.Rd | 1 + man/PipeOpTargetTrafo.Rd | 1 + man/PipeOpTaskPreproc.Rd | 1 + man/PipeOpTaskPreprocSimple.Rd | 1 + man/mlr_pipeops.Rd | 1 + man/mlr_pipeops_boxcox.Rd | 1 + man/mlr_pipeops_branch.Rd | 1 + man/mlr_pipeops_chunk.Rd | 1 + man/mlr_pipeops_classbalancing.Rd | 1 + man/mlr_pipeops_classifavg.Rd | 1 + man/mlr_pipeops_classweights.Rd | 1 + man/mlr_pipeops_colapply.Rd | 1 + man/mlr_pipeops_collapsefactors.Rd | 1 + man/mlr_pipeops_colroles.Rd | 1 + man/mlr_pipeops_copy.Rd | 1 + man/mlr_pipeops_datefeatures.Rd | 1 + man/mlr_pipeops_encode.Rd | 1 + man/mlr_pipeops_encodeimpact.Rd | 1 + man/mlr_pipeops_encodelmer.Rd | 1 + man/mlr_pipeops_featureunion.Rd | 1 + man/mlr_pipeops_filter.Rd | 1 + man/mlr_pipeops_fixfactors.Rd | 1 + man/mlr_pipeops_histbin.Rd | 1 + man/mlr_pipeops_ica.Rd | 1 + man/mlr_pipeops_imputeconstant.Rd | 1 + man/mlr_pipeops_imputehist.Rd | 1 + man/mlr_pipeops_imputelearner.Rd | 1 + man/mlr_pipeops_imputemean.Rd | 1 + man/mlr_pipeops_imputemedian.Rd | 1 + man/mlr_pipeops_imputemode.Rd | 1 + man/mlr_pipeops_imputeoor.Rd | 1 + man/mlr_pipeops_imputesample.Rd | 1 + man/mlr_pipeops_kernelpca.Rd | 1 + man/mlr_pipeops_learner.Rd | 1 + man/mlr_pipeops_missind.Rd | 1 + man/mlr_pipeops_modelmatrix.Rd | 1 + man/mlr_pipeops_multiplicityexply.Rd | 1 + man/mlr_pipeops_multiplicityimply.Rd | 1 + man/mlr_pipeops_mutate.Rd | 1 + man/mlr_pipeops_nmf.Rd | 3 ++- man/mlr_pipeops_nop.Rd | 1 + man/mlr_pipeops_ovrsplit.Rd | 1 + man/mlr_pipeops_ovrunite.Rd | 1 + man/mlr_pipeops_pca.Rd | 1 + man/mlr_pipeops_proxy.Rd | 1 + man/mlr_pipeops_quantilebin.Rd | 1 + man/mlr_pipeops_randomprojection.Rd | 1 + man/mlr_pipeops_randomresponse.Rd | 1 + man/mlr_pipeops_regravg.Rd | 1 + man/mlr_pipeops_removeconstants.Rd | 1 + man/mlr_pipeops_renamecolumns.Rd | 1 + man/mlr_pipeops_replicate.Rd | 1 + man/mlr_pipeops_scale.Rd | 1 + man/mlr_pipeops_scalemaxabs.Rd | 1 + man/mlr_pipeops_scalerange.Rd | 1 + man/mlr_pipeops_select.Rd | 1 + man/mlr_pipeops_smote.Rd | 1 + man/mlr_pipeops_spatialsign.Rd | 1 + man/mlr_pipeops_subsample.Rd | 1 + man/mlr_pipeops_targetinvert.Rd | 1 + man/mlr_pipeops_targetmutate.Rd | 1 + man/mlr_pipeops_targettrafoscalerange.Rd | 1 + man/mlr_pipeops_textvectorizer.Rd | 1 + man/mlr_pipeops_threshold.Rd | 1 + man/mlr_pipeops_tunethreshold.Rd | 1 + man/mlr_pipeops_unbranch.Rd | 1 + man/mlr_pipeops_updatetarget.Rd | 1 + man/mlr_pipeops_vtreat.Rd | 1 + man/mlr_pipeops_yeojohnson.Rd | 1 + 74 files changed, 75 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 29f8348b2..a0790bc19 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -153,6 +153,7 @@ Collate: 'PipeOpRegrAvg.R' 'PipeOpRemoveConstants.R' 'PipeOpRenameColumns.R' + 'PipeOpRowApply.R' 'PipeOpScale.R' 'PipeOpScaleMaxAbs.R' 'PipeOpScaleRange.R' diff --git a/NAMESPACE b/NAMESPACE index d69d21c09..6d8c22381 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -92,6 +92,7 @@ export(PipeOpRegrAvg) export(PipeOpRemoveConstants) export(PipeOpRenameColumns) export(PipeOpReplicate) +export(PipeOpRowApply) export(PipeOpScale) export(PipeOpScaleMaxAbs) export(PipeOpScaleRange) diff --git a/R/PipeOpRowApply.R b/R/PipeOpRowApply.R index 9e34d5e62..35809865f 100644 --- a/R/PipeOpRowApply.R +++ b/R/PipeOpRowApply.R @@ -86,7 +86,7 @@ PipeOpRowApply = R6Class("PipeOpRowApply", # Handle data table with zero rows by adding filler content to emulate column creation later if (nrow(dt) == 0) { - dt = dt[NA_integer_] # Adds emtpy row + dt = dt[NA_integer_] # Adds empty row was_empty = TRUE } else { was_empty = FALSE diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd index 82d829e42..3458719fd 100644 --- a/man/PipeOp.Rd +++ b/man/PipeOp.Rd @@ -320,6 +320,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/PipeOpEnsemble.Rd b/man/PipeOpEnsemble.Rd index 61ac51bb9..46bc5918b 100644 --- a/man/PipeOpEnsemble.Rd +++ b/man/PipeOpEnsemble.Rd @@ -152,6 +152,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/PipeOpImpute.Rd b/man/PipeOpImpute.Rd index e52256e79..caa76efbb 100644 --- a/man/PipeOpImpute.Rd +++ b/man/PipeOpImpute.Rd @@ -182,6 +182,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/PipeOpTargetTrafo.Rd b/man/PipeOpTargetTrafo.Rd index 8d811ef60..8a534ec18 100644 --- a/man/PipeOpTargetTrafo.Rd +++ b/man/PipeOpTargetTrafo.Rd @@ -193,6 +193,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/PipeOpTaskPreproc.Rd b/man/PipeOpTaskPreproc.Rd index 817173680..69f92477c 100644 --- a/man/PipeOpTaskPreproc.Rd +++ b/man/PipeOpTaskPreproc.Rd @@ -248,6 +248,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/PipeOpTaskPreprocSimple.Rd b/man/PipeOpTaskPreprocSimple.Rd index 69ec70f72..d836e75a5 100644 --- a/man/PipeOpTaskPreprocSimple.Rd +++ b/man/PipeOpTaskPreprocSimple.Rd @@ -185,6 +185,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops.Rd b/man/mlr_pipeops.Rd index bba536267..e2a7d1e1a 100644 --- a/man/mlr_pipeops.Rd +++ b/man/mlr_pipeops.Rd @@ -122,6 +122,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_boxcox.Rd b/man/mlr_pipeops_boxcox.Rd index 0d514ce8c..064a069ca 100644 --- a/man/mlr_pipeops_boxcox.Rd +++ b/man/mlr_pipeops_boxcox.Rd @@ -136,6 +136,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_branch.Rd b/man/mlr_pipeops_branch.Rd index e9b855e0a..a83b502a1 100644 --- a/man/mlr_pipeops_branch.Rd +++ b/man/mlr_pipeops_branch.Rd @@ -154,6 +154,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_chunk.Rd b/man/mlr_pipeops_chunk.Rd index 7603b5b0f..4b98bbd2a 100644 --- a/man/mlr_pipeops_chunk.Rd +++ b/man/mlr_pipeops_chunk.Rd @@ -133,6 +133,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_classbalancing.Rd b/man/mlr_pipeops_classbalancing.Rd index c734631a9..19dcd067e 100644 --- a/man/mlr_pipeops_classbalancing.Rd +++ b/man/mlr_pipeops_classbalancing.Rd @@ -174,6 +174,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_classifavg.Rd b/man/mlr_pipeops_classifavg.Rd index 381046572..160ba73ab 100644 --- a/man/mlr_pipeops_classifavg.Rd +++ b/man/mlr_pipeops_classifavg.Rd @@ -150,6 +150,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_classweights.Rd b/man/mlr_pipeops_classweights.Rd index ea3eef216..a4666b7e4 100644 --- a/man/mlr_pipeops_classweights.Rd +++ b/man/mlr_pipeops_classweights.Rd @@ -142,6 +142,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_colapply.Rd b/man/mlr_pipeops_colapply.Rd index e2e8bbe54..bf8065f8e 100644 --- a/man/mlr_pipeops_colapply.Rd +++ b/man/mlr_pipeops_colapply.Rd @@ -163,6 +163,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_collapsefactors.Rd b/man/mlr_pipeops_collapsefactors.Rd index 4dc6dc619..91798c99d 100644 --- a/man/mlr_pipeops_collapsefactors.Rd +++ b/man/mlr_pipeops_collapsefactors.Rd @@ -130,6 +130,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_colroles.Rd b/man/mlr_pipeops_colroles.Rd index 73a5ee723..a0e742faa 100644 --- a/man/mlr_pipeops_colroles.Rd +++ b/man/mlr_pipeops_colroles.Rd @@ -122,6 +122,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_copy.Rd b/man/mlr_pipeops_copy.Rd index c09aff0cf..a4160342d 100644 --- a/man/mlr_pipeops_copy.Rd +++ b/man/mlr_pipeops_copy.Rd @@ -152,6 +152,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_datefeatures.Rd b/man/mlr_pipeops_datefeatures.Rd index eb881ec59..5028544b2 100644 --- a/man/mlr_pipeops_datefeatures.Rd +++ b/man/mlr_pipeops_datefeatures.Rd @@ -169,6 +169,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_encode.Rd b/man/mlr_pipeops_encode.Rd index 79f2e3a8c..71a13f26f 100644 --- a/man/mlr_pipeops_encode.Rd +++ b/man/mlr_pipeops_encode.Rd @@ -165,6 +165,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_encodeimpact.Rd b/man/mlr_pipeops_encodeimpact.Rd index 8033735f0..7a435e1f2 100644 --- a/man/mlr_pipeops_encodeimpact.Rd +++ b/man/mlr_pipeops_encodeimpact.Rd @@ -147,6 +147,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_encodelmer.Rd b/man/mlr_pipeops_encodelmer.Rd index bfd1285ec..ad391725f 100644 --- a/man/mlr_pipeops_encodelmer.Rd +++ b/man/mlr_pipeops_encodelmer.Rd @@ -162,6 +162,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_featureunion.Rd b/man/mlr_pipeops_featureunion.Rd index e0dbf21b6..a509b87eb 100644 --- a/man/mlr_pipeops_featureunion.Rd +++ b/man/mlr_pipeops_featureunion.Rd @@ -167,6 +167,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_filter.Rd b/man/mlr_pipeops_filter.Rd index 3d5d2fc53..a37d1328a 100644 --- a/man/mlr_pipeops_filter.Rd +++ b/man/mlr_pipeops_filter.Rd @@ -198,6 +198,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_fixfactors.Rd b/man/mlr_pipeops_fixfactors.Rd index 5fd00abc2..6a4ac569c 100644 --- a/man/mlr_pipeops_fixfactors.Rd +++ b/man/mlr_pipeops_fixfactors.Rd @@ -122,6 +122,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_histbin.Rd b/man/mlr_pipeops_histbin.Rd index 74d6c5e2f..ce133cd8b 100644 --- a/man/mlr_pipeops_histbin.Rd +++ b/man/mlr_pipeops_histbin.Rd @@ -134,6 +134,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_ica.Rd b/man/mlr_pipeops_ica.Rd index 1e6473928..d6e93d163 100644 --- a/man/mlr_pipeops_ica.Rd +++ b/man/mlr_pipeops_ica.Rd @@ -162,6 +162,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_imputeconstant.Rd b/man/mlr_pipeops_imputeconstant.Rd index 0c35c4a9a..a6ab5d027 100644 --- a/man/mlr_pipeops_imputeconstant.Rd +++ b/man/mlr_pipeops_imputeconstant.Rd @@ -136,6 +136,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_imputehist.Rd b/man/mlr_pipeops_imputehist.Rd index 0fb6d8f1f..c5abb9d72 100644 --- a/man/mlr_pipeops_imputehist.Rd +++ b/man/mlr_pipeops_imputehist.Rd @@ -127,6 +127,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_imputelearner.Rd b/man/mlr_pipeops_imputelearner.Rd index a2f9ea073..4819be20f 100644 --- a/man/mlr_pipeops_imputelearner.Rd +++ b/man/mlr_pipeops_imputelearner.Rd @@ -173,6 +173,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_imputemean.Rd b/man/mlr_pipeops_imputemean.Rd index bd8d788a5..64dd29a38 100644 --- a/man/mlr_pipeops_imputemean.Rd +++ b/man/mlr_pipeops_imputemean.Rd @@ -121,6 +121,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_imputemedian.Rd b/man/mlr_pipeops_imputemedian.Rd index 00145e29d..1f4286c64 100644 --- a/man/mlr_pipeops_imputemedian.Rd +++ b/man/mlr_pipeops_imputemedian.Rd @@ -121,6 +121,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_imputemode.Rd b/man/mlr_pipeops_imputemode.Rd index 613970b73..9cbcdba06 100644 --- a/man/mlr_pipeops_imputemode.Rd +++ b/man/mlr_pipeops_imputemode.Rd @@ -128,6 +128,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_imputeoor.Rd b/man/mlr_pipeops_imputeoor.Rd index c5766c7e6..221912cfb 100644 --- a/man/mlr_pipeops_imputeoor.Rd +++ b/man/mlr_pipeops_imputeoor.Rd @@ -150,6 +150,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_imputesample.Rd b/man/mlr_pipeops_imputesample.Rd index 2944213ce..d9f4d8f75 100644 --- a/man/mlr_pipeops_imputesample.Rd +++ b/man/mlr_pipeops_imputesample.Rd @@ -123,6 +123,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_kernelpca.Rd b/man/mlr_pipeops_kernelpca.Rd index e2d92e746..a9bddd763 100644 --- a/man/mlr_pipeops_kernelpca.Rd +++ b/man/mlr_pipeops_kernelpca.Rd @@ -137,6 +137,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_learner.Rd b/man/mlr_pipeops_learner.Rd index 023c5ca9c..43c259806 100644 --- a/man/mlr_pipeops_learner.Rd +++ b/man/mlr_pipeops_learner.Rd @@ -168,6 +168,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_missind.Rd b/man/mlr_pipeops_missind.Rd index d1ac309e3..b9f8d51da 100644 --- a/man/mlr_pipeops_missind.Rd +++ b/man/mlr_pipeops_missind.Rd @@ -151,6 +151,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_modelmatrix.Rd b/man/mlr_pipeops_modelmatrix.Rd index d27063f12..1e1b00c2e 100644 --- a/man/mlr_pipeops_modelmatrix.Rd +++ b/man/mlr_pipeops_modelmatrix.Rd @@ -127,6 +127,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_multiplicityexply.Rd b/man/mlr_pipeops_multiplicityexply.Rd index 01531c672..e4c67c232 100644 --- a/man/mlr_pipeops_multiplicityexply.Rd +++ b/man/mlr_pipeops_multiplicityexply.Rd @@ -133,6 +133,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_multiplicityimply.Rd b/man/mlr_pipeops_multiplicityimply.Rd index b5a3400ab..c07f85bab 100644 --- a/man/mlr_pipeops_multiplicityimply.Rd +++ b/man/mlr_pipeops_multiplicityimply.Rd @@ -139,6 +139,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_mutate.Rd b/man/mlr_pipeops_mutate.Rd index cff63d4b1..8da58522f 100644 --- a/man/mlr_pipeops_mutate.Rd +++ b/man/mlr_pipeops_mutate.Rd @@ -144,6 +144,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd index 148dfbcfd..7c8c351df 100644 --- a/man/mlr_pipeops_nmf.Rd +++ b/man/mlr_pipeops_nmf.Rd @@ -96,7 +96,7 @@ See \code{\link[NMF:nmf]{nmf()}}. \section{Internals}{ -Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis-coef-methods]{basis()}}, \code{\link[NMF:basis-coef-methods]{coef()}} and +Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis]{basis()}}, \code{\link[NMF:coef]{coef()}} and \code{\link[MASS:ginv]{ginv()}}. } @@ -179,6 +179,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_nop.Rd b/man/mlr_pipeops_nop.Rd index eabc9e48f..fd6fd2ea4 100644 --- a/man/mlr_pipeops_nop.Rd +++ b/man/mlr_pipeops_nop.Rd @@ -129,6 +129,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_ovrsplit.Rd b/man/mlr_pipeops_ovrsplit.Rd index e0718678b..76c661fde 100644 --- a/man/mlr_pipeops_ovrsplit.Rd +++ b/man/mlr_pipeops_ovrsplit.Rd @@ -146,6 +146,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_ovrunite.Rd b/man/mlr_pipeops_ovrunite.Rd index 83f3c85c2..f01cba41e 100644 --- a/man/mlr_pipeops_ovrunite.Rd +++ b/man/mlr_pipeops_ovrunite.Rd @@ -141,6 +141,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_pca.Rd b/man/mlr_pipeops_pca.Rd index ca5d14d59..18f5eb086 100644 --- a/man/mlr_pipeops_pca.Rd +++ b/man/mlr_pipeops_pca.Rd @@ -138,6 +138,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_proxy.Rd b/man/mlr_pipeops_proxy.Rd index 1e6e8f9c0..a5ef51112 100644 --- a/man/mlr_pipeops_proxy.Rd +++ b/man/mlr_pipeops_proxy.Rd @@ -152,6 +152,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_quantilebin.Rd b/man/mlr_pipeops_quantilebin.Rd index 8b416ee52..6e5a85a24 100644 --- a/man/mlr_pipeops_quantilebin.Rd +++ b/man/mlr_pipeops_quantilebin.Rd @@ -126,6 +126,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_randomprojection.Rd b/man/mlr_pipeops_randomprojection.Rd index e41d6ea42..2323caf66 100644 --- a/man/mlr_pipeops_randomprojection.Rd +++ b/man/mlr_pipeops_randomprojection.Rd @@ -138,6 +138,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_randomresponse.Rd b/man/mlr_pipeops_randomresponse.Rd index 2f813a326..c497d3ad2 100644 --- a/man/mlr_pipeops_randomresponse.Rd +++ b/man/mlr_pipeops_randomresponse.Rd @@ -155,6 +155,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_regravg.Rd b/man/mlr_pipeops_regravg.Rd index 4b1603441..a97bde700 100644 --- a/man/mlr_pipeops_regravg.Rd +++ b/man/mlr_pipeops_regravg.Rd @@ -141,6 +141,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_removeconstants.Rd b/man/mlr_pipeops_removeconstants.Rd index 4fe961f7c..ef3d43d75 100644 --- a/man/mlr_pipeops_removeconstants.Rd +++ b/man/mlr_pipeops_removeconstants.Rd @@ -131,6 +131,7 @@ Other PipeOps: \code{\link{mlr_pipeops_regravg}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_renamecolumns.Rd b/man/mlr_pipeops_renamecolumns.Rd index 768211b84..20947f1be 100644 --- a/man/mlr_pipeops_renamecolumns.Rd +++ b/man/mlr_pipeops_renamecolumns.Rd @@ -130,6 +130,7 @@ Other PipeOps: \code{\link{mlr_pipeops_regravg}}, \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_replicate.Rd b/man/mlr_pipeops_replicate.Rd index 7735a2586..71949f16c 100644 --- a/man/mlr_pipeops_replicate.Rd +++ b/man/mlr_pipeops_replicate.Rd @@ -123,6 +123,7 @@ Other PipeOps: \code{\link{mlr_pipeops_regravg}}, \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_scale.Rd b/man/mlr_pipeops_scale.Rd index 33d4e027e..9c8a3a316 100644 --- a/man/mlr_pipeops_scale.Rd +++ b/man/mlr_pipeops_scale.Rd @@ -146,6 +146,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, \code{\link{mlr_pipeops_select}}, diff --git a/man/mlr_pipeops_scalemaxabs.Rd b/man/mlr_pipeops_scalemaxabs.Rd index 279a2c7c1..46c5c4c45 100644 --- a/man/mlr_pipeops_scalemaxabs.Rd +++ b/man/mlr_pipeops_scalemaxabs.Rd @@ -121,6 +121,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalerange}}, \code{\link{mlr_pipeops_select}}, diff --git a/man/mlr_pipeops_scalerange.Rd b/man/mlr_pipeops_scalerange.Rd index 707ca661c..678e54b0d 100644 --- a/man/mlr_pipeops_scalerange.Rd +++ b/man/mlr_pipeops_scalerange.Rd @@ -126,6 +126,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_select}}, diff --git a/man/mlr_pipeops_select.Rd b/man/mlr_pipeops_select.Rd index c3d8ec0f9..353e280b0 100644 --- a/man/mlr_pipeops_select.Rd +++ b/man/mlr_pipeops_select.Rd @@ -142,6 +142,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_smote.Rd b/man/mlr_pipeops_smote.Rd index b92867d6e..ccbd9c6cd 100644 --- a/man/mlr_pipeops_smote.Rd +++ b/man/mlr_pipeops_smote.Rd @@ -145,6 +145,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_spatialsign.Rd b/man/mlr_pipeops_spatialsign.Rd index 4995632ca..9fdb650d6 100644 --- a/man/mlr_pipeops_spatialsign.Rd +++ b/man/mlr_pipeops_spatialsign.Rd @@ -121,6 +121,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_subsample.Rd b/man/mlr_pipeops_subsample.Rd index 5a0a3c9fc..c89142226 100644 --- a/man/mlr_pipeops_subsample.Rd +++ b/man/mlr_pipeops_subsample.Rd @@ -136,6 +136,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_targetinvert.Rd b/man/mlr_pipeops_targetinvert.Rd index a63ea2feb..fe5073375 100644 --- a/man/mlr_pipeops_targetinvert.Rd +++ b/man/mlr_pipeops_targetinvert.Rd @@ -121,6 +121,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_targetmutate.Rd b/man/mlr_pipeops_targetmutate.Rd index 5193c2db9..dd7982fdf 100644 --- a/man/mlr_pipeops_targetmutate.Rd +++ b/man/mlr_pipeops_targetmutate.Rd @@ -169,6 +169,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_targettrafoscalerange.Rd b/man/mlr_pipeops_targettrafoscalerange.Rd index 8441b3f1e..e651099eb 100644 --- a/man/mlr_pipeops_targettrafoscalerange.Rd +++ b/man/mlr_pipeops_targettrafoscalerange.Rd @@ -135,6 +135,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_textvectorizer.Rd b/man/mlr_pipeops_textvectorizer.Rd index 6212e6ad6..57ab20d9a 100644 --- a/man/mlr_pipeops_textvectorizer.Rd +++ b/man/mlr_pipeops_textvectorizer.Rd @@ -235,6 +235,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_threshold.Rd b/man/mlr_pipeops_threshold.Rd index 98c3039c0..addaefa55 100644 --- a/man/mlr_pipeops_threshold.Rd +++ b/man/mlr_pipeops_threshold.Rd @@ -128,6 +128,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_tunethreshold.Rd b/man/mlr_pipeops_tunethreshold.Rd index 7ce2bc4ab..30684bfb6 100644 --- a/man/mlr_pipeops_tunethreshold.Rd +++ b/man/mlr_pipeops_tunethreshold.Rd @@ -153,6 +153,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_unbranch.Rd b/man/mlr_pipeops_unbranch.Rd index a6986b956..6d17dfeb3 100644 --- a/man/mlr_pipeops_unbranch.Rd +++ b/man/mlr_pipeops_unbranch.Rd @@ -133,6 +133,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_updatetarget.Rd b/man/mlr_pipeops_updatetarget.Rd index 9e1ae3b06..2774382f7 100644 --- a/man/mlr_pipeops_updatetarget.Rd +++ b/man/mlr_pipeops_updatetarget.Rd @@ -148,6 +148,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_vtreat.Rd b/man/mlr_pipeops_vtreat.Rd index 67f23519d..28d5f205a 100644 --- a/man/mlr_pipeops_vtreat.Rd +++ b/man/mlr_pipeops_vtreat.Rd @@ -201,6 +201,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, diff --git a/man/mlr_pipeops_yeojohnson.Rd b/man/mlr_pipeops_yeojohnson.Rd index 82284c0d1..89123d332 100644 --- a/man/mlr_pipeops_yeojohnson.Rd +++ b/man/mlr_pipeops_yeojohnson.Rd @@ -138,6 +138,7 @@ Other PipeOps: \code{\link{mlr_pipeops_removeconstants}}, \code{\link{mlr_pipeops_renamecolumns}}, \code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, \code{\link{mlr_pipeops_scale}}, \code{\link{mlr_pipeops_scalemaxabs}}, \code{\link{mlr_pipeops_scalerange}}, From 6259baec2998cfced8e5fa731d53ccba1b0bf2a3 Mon Sep 17 00:00:00 2001 From: "Keno M." <118814423+advieser@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:11:39 +0200 Subject: [PATCH 12/15] Apply suggestions from code review Co-authored-by: mb706 --- R/PipeOpRowApply.R | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/R/PipeOpRowApply.R b/R/PipeOpRowApply.R index 35809865f..4d6c164be 100644 --- a/R/PipeOpRowApply.R +++ b/R/PipeOpRowApply.R @@ -30,13 +30,10 @@ #' The parameters are the parameters inherited from [`PipeOpTaskPreprocSimple`], as well as: #' * `applicator` :: `function`\cr #' Function to apply to each row in the affected columns of the task. -#' The return value should be a `vector` of the same length as the input, i.e., the function vectorizes over the input. -#' Alternatively, the return value can be a `matrix`, `data.frame`, or [`data.table`][data.table::data.table]. -#' In this case, the length of the input must match the number of returned rows. -#' Use [`Vectorize`][base::Vectorize] to create a vectorizing function from any function that ordinarily only takes one element input. -#' Default is [identity()][base::identity] +#' The return value should be a vector of the same length for every input. +#' Initialized as [`identity()`][base::identity] #' * `col_prefix` :: `character(1)`\cr -#' If specified, prefix to be prepended to the column names of affected columns, separated by a dot (`.`). Default is `character(1)`. +#' If specified, prefix to be prepended to the column names of affected columns, separated by a dot (`.`). Default is `""`. #' #' @section Internals: #' Calls [`apply`] on the data, using the value of `applicator` as `FUN` and `simplify = TRUE`, then coerces the output via @@ -93,16 +90,16 @@ PipeOpRowApply = R6Class("PipeOpRowApply", } res = apply(dt, 1, applicator) - if (!(test_atomic_vector(res) | test_matrix(res))) { + if (!(test_atomic_vector(res) || test_matrix(res))) { stop("Apply with FUN = applicator and simplified = TRUE should generate either atomic vector or matrix.") } # Convert result to a matrix for consistent column name handling if (test_atomic_vector(res)) { - res = matrix(res, nrow = 1) # Ensure matrix has one row for correct transposition - } + res = matrix(res, ncol = 1) # Ensure matrix has one row for correct transposition + } else { # Transpose the matrix for correct Task dimensions res = t(res) - + } # Assign column names if they are missing if (is.null(colnames(res))) { if (ncol(res) == ncol(dt)) { @@ -113,7 +110,7 @@ PipeOpRowApply = R6Class("PipeOpRowApply", } # Prepend column prefix if specified if (col_prefix != "") { - colnames(res) <- paste(col_prefix, colnames(res), sep = ".") + colnames(res) = paste(col_prefix, colnames(res), sep = ".") } # Remove filler content if the original data.table had zero rows From 5568de5687a067ca0bde7d182ce6e737052166b9 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Tue, 30 Jul 2024 16:25:10 +0200 Subject: [PATCH 13/15] fixed code review suggestsions, added handling of zero column output --- R/PipeOpRowApply.R | 26 ++++++----- man/mlr_pipeops_rowapply.Rd | 9 ++-- tests/testthat/test_pipeop_rowapply.R | 63 +++++++++++++++++++++++++-- 3 files changed, 77 insertions(+), 21 deletions(-) diff --git a/R/PipeOpRowApply.R b/R/PipeOpRowApply.R index 4d6c164be..087280490 100644 --- a/R/PipeOpRowApply.R +++ b/R/PipeOpRowApply.R @@ -31,7 +31,7 @@ #' * `applicator` :: `function`\cr #' Function to apply to each row in the affected columns of the task. #' The return value should be a vector of the same length for every input. -#' Initialized as [`identity()`][base::identity] +#' Initialized as [`identity()`][base::identity]. #' * `col_prefix` :: `character(1)`\cr #' If specified, prefix to be prepended to the column names of affected columns, separated by a dot (`.`). Default is `""`. #' @@ -77,28 +77,31 @@ PipeOpRowApply = R6Class("PipeOpRowApply", }, .transform_dt = function(dt, levels) { - applicator = self$param_set$values$applicator - col_prefix = self$param_set$values$col_prefix + pv = self$param_set$values cnames = colnames(dt) # Handle data table with zero rows by adding filler content to emulate column creation later - if (nrow(dt) == 0) { + if (nrow(dt) == 0L) { dt = dt[NA_integer_] # Adds empty row was_empty = TRUE } else { was_empty = FALSE } - res = apply(dt, 1, applicator) + res = apply(dt, 1, pv$applicator) + + if (!length(res)) { + return(matrix(numeric(0), nrow = nrow(dt))) + } if (!(test_atomic_vector(res) || test_matrix(res))) { stop("Apply with FUN = applicator and simplified = TRUE should generate either atomic vector or matrix.") } # Convert result to a matrix for consistent column name handling if (test_atomic_vector(res)) { - res = matrix(res, ncol = 1) # Ensure matrix has one row for correct transposition + res = matrix(res, ncol = 1) } else { - # Transpose the matrix for correct Task dimensions - res = t(res) + # Transpose the matrix for correct Task dimensions + res = t(res) } # Assign column names if they are missing if (is.null(colnames(res))) { @@ -109,12 +112,11 @@ PipeOpRowApply = R6Class("PipeOpRowApply", } } # Prepend column prefix if specified - if (col_prefix != "") { - colnames(res) = paste(col_prefix, colnames(res), sep = ".") + if (pv$col_prefix != "") { + colnames(res) = paste(pv$col_prefix, colnames(res), sep = ".") } - # Remove filler content if the original data.table had zero rows - if (was_empty == TRUE) { + if (was_empty) { res = res[0L, ] } diff --git a/man/mlr_pipeops_rowapply.Rd b/man/mlr_pipeops_rowapply.Rd index ca1f89e5e..85e0ac30e 100644 --- a/man/mlr_pipeops_rowapply.Rd +++ b/man/mlr_pipeops_rowapply.Rd @@ -43,13 +43,10 @@ The parameters are the parameters inherited from \code{\link{PipeOpTaskPreprocSi \itemize{ \item \code{applicator} :: \code{function}\cr Function to apply to each row in the affected columns of the task. -The return value should be a \code{vector} of the same length as the input, i.e., the function vectorizes over the input. -Alternatively, the return value can be a \code{matrix}, \code{data.frame}, or \code{\link[data.table:data.table]{data.table}}. -In this case, the length of the input must match the number of returned rows. -Use \code{\link[base:Vectorize]{Vectorize}} to create a vectorizing function from any function that ordinarily only takes one element input. -Default is \link[base:identity]{identity()} +The return value should be a vector of the same length for every input. +Initialized as \code{\link[base:identity]{identity()}}. \item \code{col_prefix} :: \code{character(1)}\cr -If specified, prefix to be prepended to the column names of affected columns, separated by a dot (\code{.}). Default is \code{character(1)}. +If specified, prefix to be prepended to the column names of affected columns, separated by a dot (\code{.}). Default is \code{""}. } } diff --git a/tests/testthat/test_pipeop_rowapply.R b/tests/testthat/test_pipeop_rowapply.R index 50cc9f718..cb6430b57 100644 --- a/tests/testthat/test_pipeop_rowapply.R +++ b/tests/testthat/test_pipeop_rowapply.R @@ -71,6 +71,19 @@ test_that("PipeOpRowApply - transform on task with only numeric features", { as.data.table(t(matrix(apply(iris, 1, applicator), nrow = 1))) ) + # applicator generates empty output + applicator = function(x) numeric(0) + op$param_set$values$applicator = applicator + + expect_equal( + op$train(list(task))[[1]]$data(), + task$data(cols = task$target_names) + ) + expect_equal( + op$predict(list(task))[[1]]$data(), + task$data(cols = task$target_names) + ) + # error if apply generates anything but a matrix or vector (e.g. non-simplifiable list) applicator = function(x) if(mean(x) < 3) c(x[[1]], x[[2]]) else x[[1]] op$param_set$values$applicator = applicator @@ -92,9 +105,8 @@ test_that("PipeOpRowApply - transform on task with only numeric features", { test_that("PipeOpRowApply - transform works on task with only integer features", { op = PipeOpRowApply$new() - task = mlr_tasks$get("german_credit") - cnames = c("age", "amount", "duration") - task$select(cnames) + task = mlr_tasks$get("german_credit")$select(c("age", "amount", "duration")) + cnames = task$feature_names german_credit = task$data(cols = cnames) # applicator generates matrix with names @@ -151,6 +163,19 @@ test_that("PipeOpRowApply - transform works on task with only integer features", as.data.table(t(matrix(apply(german_credit, 1, applicator), nrow = 1))) ) + # applicator generates empty output + applicator = function(x) numeric(0) + op$param_set$values$applicator = applicator + + expect_equal( + op$train(list(task))[[1]]$data(), + task$data(cols = task$target_names) + ) + expect_equal( + op$predict(list(task))[[1]]$data(), + task$data(cols = task$target_names) + ) + # error if apply generates anything but a matrix or vector (e.g. non-simplifiable list) applicator = function(x) if(mean(x) < 1000) c(x[[1]], x[[2]]) else x[[1]] op$param_set$values$applicator = applicator @@ -230,6 +255,19 @@ test_that("PipeOpRowApply - transform works on task with both numeric and intege as.data.table(t(matrix(apply(wine, 1, applicator), nrow = 1))) ) + # applicator generates empty output + applicator = function(x) numeric(0) + op$param_set$values$applicator = applicator + + expect_equal( + op$train(list(task))[[1]]$data(), + task$data(cols = task$target_names) + ) + expect_equal( + op$predict(list(task))[[1]]$data(), + task$data(cols = task$target_names) + ) + # error if apply generates anything but a matrix or vector (e.g. non-simplifiable list) applicator = function(x) if(mean(x) < 50) c(x[[1]], x[[2]]) else x[[1]] op$param_set$values$applicator = applicator @@ -443,3 +481,22 @@ test_that("PipeOpRowApply - transform works for only empty predict task (no rows expect_true(all(predict_out$feature_types$type == "integer")) }) + +test_that("PipeOpRowApply - transform works on task with no numeric or integer columns", { + + op = PipeOpRowApply$new() + task = mlr_tasks$get("penguins")$select(c("island", "sex")) + cnames = task$feature_names + + op$param_set$values$applicator = as.integer + + expect_equal( + op$train(list(task))[[1]], + task + ) + expect_equal( + op$predict(list(task))[[1]], + task + ) + +}) From 886af9c7de55778473f306c3fe22c441cf6db4a7 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Tue, 30 Jul 2024 20:40:12 +0200 Subject: [PATCH 14/15] fixed test --- tests/testthat/test_pipeop_rowapply.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test_pipeop_rowapply.R b/tests/testthat/test_pipeop_rowapply.R index cb6430b57..8849da12b 100644 --- a/tests/testthat/test_pipeop_rowapply.R +++ b/tests/testthat/test_pipeop_rowapply.R @@ -427,12 +427,12 @@ test_that("PipeOpRowApply - transform works on empty task (no rows)", { }) -test_that("PipeOpRowApply - transform works for only empty predict task (no rows)", { +test_that("PipeOpRowApply - transform works for empty predict task (no rows)", { op = PipeOpRowApply$new() task_train = mlr_tasks$get("wine") - task_predict = task$filter(0) - cnames = task$feature_names + task_predict = task_train$filter(0) + cnames = task_train$feature_names # applicator generates matrix with names applicator = function(x) x^2 From cbc2db6435e8b079a2b5352254eedcfd9d2934b4 Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 6 Aug 2024 13:32:43 +0200 Subject: [PATCH 15/15] minor doc fixes --- R/PipeOpRowApply.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/PipeOpRowApply.R b/R/PipeOpRowApply.R index 087280490..44628eaee 100644 --- a/R/PipeOpRowApply.R +++ b/R/PipeOpRowApply.R @@ -33,11 +33,10 @@ #' The return value should be a vector of the same length for every input. #' Initialized as [`identity()`][base::identity]. #' * `col_prefix` :: `character(1)`\cr -#' If specified, prefix to be prepended to the column names of affected columns, separated by a dot (`.`). Default is `""`. +#' If specified, prefix to be prepended to the column names of affected columns, separated by a dot (`.`). Initialized as `""`. #' #' @section Internals: -#' Calls [`apply`] on the data, using the value of `applicator` as `FUN` and `simplify = TRUE`, then coerces the output via -#' [`as.data.table()`][data.table::as.data.table]. +#' Calls [`apply`] on the data, using the value of `applicator` as `FUN`. #' #' @section Fields: #' Only fields inherited from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`].