From 8946fcda7fa3f8ec4789a7e97156bf07ac2b1de2 Mon Sep 17 00:00:00 2001 From: mb706 Date: Fri, 16 Jun 2023 01:47:07 +0200 Subject: [PATCH 01/46] add `replace` argument to ppl("bagging") --- R/pipeline_bagging.R | 15 ++++++++++++--- man/mlr_graphs_bagging.Rd | 20 ++++++++++++++++++-- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/R/pipeline_bagging.R b/R/pipeline_bagging.R index afcf0c7f9..2ffd03d7a 100644 --- a/R/pipeline_bagging.R +++ b/R/pipeline_bagging.R @@ -28,6 +28,9 @@ #' predictions respectively. #' If `NULL` (default), no averager is added to the end of the graph. #' Note that setting `collect_multipliciy = TRUE` during construction of the averager is required. +#' @param replace `logical(1)` \cr +#' Whether to sample with replacement. +#' Default `FALSE`. #' @return [`Graph`] #' @export #' @examples @@ -36,9 +39,15 @@ #' lrn_po = po("learner", lrn("regr.rpart")) #' task = mlr_tasks$get("boston_housing") #' gr = pipeline_bagging(lrn_po, 3, averager = po("regravg", collect_multiplicity = TRUE)) -#' resample(task, GraphLearner$new(gr), rsmp("holdout")) +#' resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() +#' +#' # The original bagging method uses boosting by sampling with replacement. +#' # This may give better performance but is also slower. +#' gr = ppl("bagging", lrn_po, frac = 1, replace = TRUE, +#' averager = po("regravg", collect_multiplicity = TRUE)) +#' resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() #' } -pipeline_bagging = function(graph, iterations = 10, frac = 0.7, averager = NULL) { +pipeline_bagging = function(graph, iterations = 10, frac = 0.7, averager = NULL, replace = FALSE) { g = as_graph(graph) assert_count(iterations) assert_number(frac, lower = 0, upper = 1) @@ -50,7 +59,7 @@ pipeline_bagging = function(graph, iterations = 10, frac = 0.7, averager = NULL) } po("replicate", param_vals = list(reps = iterations)) %>>!% - po("subsample", param_vals = list(frac = frac)) %>>!% + po("subsample", param_vals = list(frac = frac, replace = replace)) %>>!% g %>>!% averager } diff --git a/man/mlr_graphs_bagging.Rd b/man/mlr_graphs_bagging.Rd index 42828ef94..186ac71ff 100644 --- a/man/mlr_graphs_bagging.Rd +++ b/man/mlr_graphs_bagging.Rd @@ -5,7 +5,13 @@ \alias{pipeline_bagging} \title{Create a bagging learner} \usage{ -pipeline_bagging(graph, iterations = 10, frac = 0.7, averager = NULL) +pipeline_bagging( + graph, + iterations = 10, + frac = 0.7, + averager = NULL, + replace = FALSE +) } \arguments{ \item{graph}{\code{\link{PipeOp}} | \code{\link{Graph}} \cr @@ -27,6 +33,10 @@ in order to perform simple averaging of classification and regression predictions respectively. If \code{NULL} (default), no averager is added to the end of the graph. Note that setting \code{collect_multipliciy = TRUE} during construction of the averager is required.} + +\item{replace}{\code{logical(1)} \cr +Whether to sample with replacement. +Default \code{FALSE}.} } \value{ \code{\link{Graph}} @@ -49,6 +59,12 @@ library(mlr3) lrn_po = po("learner", lrn("regr.rpart")) task = mlr_tasks$get("boston_housing") gr = pipeline_bagging(lrn_po, 3, averager = po("regravg", collect_multiplicity = TRUE)) -resample(task, GraphLearner$new(gr), rsmp("holdout")) +resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() + +# The original bagging method uses boosting by sampling with replacement. +# This may give better performance but is also slower. +gr = ppl("bagging", lrn_po, frac = 1, replace = TRUE, + averager = po("regravg", collect_multiplicity = TRUE)) +resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() } } From b1713faa0b9de29722d72a624a9e5d73b932791a Mon Sep 17 00:00:00 2001 From: mb706 Date: Fri, 16 Jun 2023 01:49:38 +0200 Subject: [PATCH 02/46] news entry --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index a1074398a..5e13c1044 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # mlr3pipelines 0.5.0-9000 +* `pipeline_bagging()` gets the `replace` argument. + # mlr3pipelines 0.5.0-1 * Bugfix: `PipeOpTuneThreshold` was not overloading the correct `.train` and `.predict` functions. From 961c3b213c6bf68e410e7601346ea7ee13a0808a Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Wed, 11 Oct 2023 15:44:41 +0200 Subject: [PATCH 03/46] feat: keep_results can be character vector of IDs This can be useful when wanting access to an object that is not an output node of the graph, i.e. we don't have to add a `PipeOpNOP` (or keep all results) to achieve this. --- NEWS.md | 3 +++ R/Graph.R | 5 +++-- R/PipeOp.R | 2 +- man/Graph.Rd | 3 ++- man/PipeOp.Rd | 2 +- man/mlr_pipeops_nmf.Rd | 2 +- tests/testthat/test_Graph.R | 11 +++++++++++ 7 files changed, 22 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index a1074398a..7b147489e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # mlr3pipelines 0.5.0-9000 +* Feature: The `Graph`'s `keep_results` can now also be a character vector +containing the IDs of the `PipeOp`s whose results are being stored. + # mlr3pipelines 0.5.0-1 * Bugfix: `PipeOpTuneThreshold` was not overloading the correct `.train` and `.predict` functions. diff --git a/R/Graph.R b/R/Graph.R index 8cc95a0ae..b9070ffc1 100644 --- a/R/Graph.R +++ b/R/Graph.R @@ -59,8 +59,9 @@ #' * `phash` :: `character(1)` \cr #' Stores a checksum calculated on the [`Graph`] configuration, which includes all [`PipeOp`] hashes #' *except* their `$param_set$values`, and a hash of `$edges`. -#' * `keep_results` :: `logical(1)` \cr +#' * `keep_results` :: `logical(1)` or `character()` \cr #' Whether to store intermediate results in the [`PipeOp`]'s `$.result` slot, mostly for debugging purposes. Default `FALSE`. +#' Can also be a character vector of IDs, in which case only the results of the selected `PipeOp`s are stored. #' * `man` :: `character(1)`\cr #' Identifying string of the help page that shows with `help()`. #' @@ -642,7 +643,7 @@ graph_reduce = function(self, input, fun, single_input) { lg$debug("Running PipeOp '%s$%s()'", id, fun, pipeop = op, input = input) output = op[[fun]](input) - if (self$keep_results) { + if (isTRUE(self$keep_results) || op$id %in% self$keep_results) { op$.result = output } diff --git a/R/PipeOp.R b/R/PipeOp.R index 24a12ab6c..c9b393e70 100644 --- a/R/PipeOp.R +++ b/R/PipeOp.R @@ -130,7 +130,7 @@ #' [`PipeOp`]'s functionality may change depending on more than these values, it should inherit the `$hash` active #' binding and calculate the hash as `digest(list(super$hash, ), algo = "xxhash64")`. #' * `.result` :: `list` \cr -#' If the [`Graph`]'s `$keep_results` flag is set to `TRUE`, then the intermediate Results of `$train()` and `$predict()` +#' If the [`Graph`]'s `$keep_results` flag is set to `TRUE` or contains the ID of this `PipeOp`, then the intermediate Results of `$train()` and `$predict()` #' are saved to this slot, exactly as they are returned by these functions. This is mainly for debugging purposes #' and done, if requested, by the [`Graph`] backend itself; it should *not* be done explicitly by `private$.train()` or `private$.predict()`. #' * `man` :: `character(1)`\cr diff --git a/man/Graph.Rd b/man/Graph.Rd index 8da82dac3..fa51c234e 100644 --- a/man/Graph.Rd +++ b/man/Graph.Rd @@ -69,8 +69,9 @@ Stores a checksum calculated on the \code{\link{Graph}} configuration, which inc \item \code{phash} :: \code{character(1)} \cr Stores a checksum calculated on the \code{\link{Graph}} configuration, which includes all \code{\link{PipeOp}} hashes \emph{except} their \verb{$param_set$values}, and a hash of \verb{$edges}. -\item \code{keep_results} :: \code{logical(1)} \cr +\item \code{keep_results} :: \code{logical(1)} or \code{character()} \cr Whether to store intermediate results in the \code{\link{PipeOp}}'s \verb{$.result} slot, mostly for debugging purposes. Default \code{FALSE}. +Can also be a character vector of IDs, in which case only the results of the selected \code{PipeOp}s are stored. \item \code{man} :: \code{character(1)}\cr Identifying string of the help page that shows with \code{help()}. } diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd index 4292943b1..553ca0301 100644 --- a/man/PipeOp.Rd +++ b/man/PipeOp.Rd @@ -137,7 +137,7 @@ Checksum calculated on the \code{\link{PipeOp}}, depending on the \code{\link{Pi \code{\link{PipeOp}}'s functionality may change depending on more than these values, it should inherit the \verb{$hash} active binding and calculate the hash as \verb{digest(list(super$hash, ), algo = "xxhash64")}. \item \code{.result} :: \code{list} \cr -If the \code{\link{Graph}}'s \verb{$keep_results} flag is set to \code{TRUE}, then the intermediate Results of \verb{$train()} and \verb{$predict()} +If the \code{\link{Graph}}'s \verb{$keep_results} flag is set to \code{TRUE} or contains the ID of this \code{PipeOp}, then the intermediate Results of \verb{$train()} and \verb{$predict()} are saved to this slot, exactly as they are returned by these functions. This is mainly for debugging purposes and done, if requested, by the \code{\link{Graph}} backend itself; it should \emph{not} be done explicitly by \code{private$.train()} or \code{private$.predict()}. \item \code{man} :: \code{character(1)}\cr diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd index 5e967fab2..3c8a75c9a 100644 --- a/man/mlr_pipeops_nmf.Rd +++ b/man/mlr_pipeops_nmf.Rd @@ -96,7 +96,7 @@ See \code{\link[NMF:nmf]{nmf()}}. \section{Internals}{ -Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis-coef-methods]{basis()}}, \code{\link[NMF:basis-coef-methods]{coef()}} and +Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis]{basis()}}, \code{\link[NMF:coef]{coef()}} and \code{\link[MASS:ginv]{ginv()}}. } diff --git a/tests/testthat/test_Graph.R b/tests/testthat/test_Graph.R index ed6f900d8..af40c8f15 100644 --- a/tests/testthat/test_Graph.R +++ b/tests/testthat/test_Graph.R @@ -501,3 +501,14 @@ test_that("Same output into multiple channels does not cause a bug", { expect_true(res$po3.output1 == 2) expect_true(res$po4.output1 == 2) }) + +test_that("keep_results can be a character vector", { + graph = po("pca") %>>% po("ica") + + graph$keep_results = "pca" + + graph$train(tsk("iris")) + + expect_true(is.null(graph$pipeops$ica$.result)) + expect_class(graph$pipeops$pca$.result[[1L]], "Task") +}) From 6af2a0525f947354b1084cf93ce0fdfee36b86ba Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Fri, 27 Oct 2023 17:39:30 +0200 Subject: [PATCH 04/46] fix(POFU): columns named "x" were sometimes dropped The "vodoo" variable was named "x". When another variable was named "x", the subsetting of unique names kept only the first occuranc of "x". --- R/PipeOpFeatureUnion.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/PipeOpFeatureUnion.R b/R/PipeOpFeatureUnion.R index 8a2c1b4ce..bbc1e890b 100644 --- a/R/PipeOpFeatureUnion.R +++ b/R/PipeOpFeatureUnion.R @@ -202,6 +202,7 @@ cbind_tasks = function(inputs, assert_targets_equal, inprefix) { # again done by reference new_features = unlist(c(list(data.table(x = vector(length = task$nrow))), map(tail(inputs, -1L), .f = function(y) y$data(ids, cols = y$feature_names))), recursive = FALSE) + names(new_features)[1] = make.unique(rev(names(new_features)))[[length(new_features)]] # we explicitly have to subset to the unique column names, otherwise task$cbind() complains for data.table backends new_features = new_features[unique(names(new_features))] From 3ff31a4ec98ed91522ef2cd85e3525a8af20338a Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Tue, 31 Oct 2023 20:16:16 +0100 Subject: [PATCH 05/46] tests: add test for pofu dropping 'x' --- tests/testthat/test_pipeop_featureunion.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/testthat/test_pipeop_featureunion.R b/tests/testthat/test_pipeop_featureunion.R index 9687d1afb..f408fb196 100644 --- a/tests/testthat/test_pipeop_featureunion.R +++ b/tests/testthat/test_pipeop_featureunion.R @@ -257,3 +257,18 @@ test_that("featureunion - cbind_tasks - duplicates", { expect_equal(output$data(cols = "x"), inputs[[1L]]$data(cols = "x")) expect_equivalent(output$data(cols = c("Species", new_iris_names)), task1$data()) }) + +test_that("featureunion - does not drop 'x' column", { + task1 = as_task_regr(data.table( + z = 1:10, + y = 1:10 + ), target = "y") + + task2 = as_task_regr(data.table( + x = 1:10, + y = 1:10 + ), target = "y") + + taskout = po("featureunion")$train(list(task1, task2))[[1L]] + expect_permutation(taskout$feature_names, c("x", "z")) +}) From bed0f07d294b73b863b8d5e3ce984a81d7638d73 Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Wed, 8 Nov 2023 18:00:40 +0100 Subject: [PATCH 06/46] feat(Graph): add_pipeop gets argument 'clone' --- NEWS.md | 3 +++ R/Graph.R | 11 ++++++----- man/Graph.Rd | 7 ++++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index 7b147489e..940986bc2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,9 @@ * Feature: The `Graph`'s `keep_results` can now also be a character vector containing the IDs of the `PipeOp`s whose results are being stored. +* Feature: The `$add_pipeop()` method got an argument `clone` that is `TRUE` by +default (previously `PipeOp`s were always cloned) +* Bugfix: `PipeOpFeatureUnion` in some rare cases dropped variables called `"x"` # mlr3pipelines 0.5.0-1 diff --git a/R/Graph.R b/R/Graph.R index b9070ffc1..f4fc796ad 100644 --- a/R/Graph.R +++ b/R/Graph.R @@ -70,13 +70,14 @@ #' (`logical(1)`) -> `character` \cr #' Get IDs of all [`PipeOp`]s. This is in order that [`PipeOp`]s were added if #' `sorted` is `FALSE`, and topologically sorted if `sorted` is `TRUE`. -#' * `add_pipeop(op)` \cr -#' ([`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`) -> `self` \cr +#' * `add_pipeop(op, clone = TRUE)` \cr +#' ([`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`, `logical(1)`) -> `self` \cr #' Mutates [`Graph`] by adding a [`PipeOp`] to the [`Graph`]. This does not add any edges, so the new [`PipeOp`] #' will not be connected within the [`Graph`] at first.\cr #' Instead of supplying a [`PipeOp`] directly, an object that can naturally be converted to a [`PipeOp`] can also #' be supplied, e.g. a [`Learner`][mlr3::Learner] or a [`Filter`][mlr3filters::Filter]; see [`as_pipeop()`]. -#' The argument given as `op` is always cloned; to access a `Graph`'s [`PipeOp`]s by-reference, use `$pipeops`.\cr +#' The argument given as `op` is cloned if `clone` is `TRUE` (default); to access a `Graph`'s [`PipeOp`]s +#' by-reference, use `$pipeops`.\cr #' Note that `$add_pipeop()` is a relatively low-level operation, it is recommended to build graphs using [`%>>%`]. #' * `add_edge(src_id, dst_id, src_channel = NULL, dst_channel = NULL)` \cr #' (`character(1)`, `character(1)`, @@ -182,8 +183,8 @@ Graph = R6Class("Graph", topo_sort(tmp)$id }, - add_pipeop = function(op) { - op = as_pipeop(op, clone = TRUE) + add_pipeop = function(op, clone = TRUE) { + op = as_pipeop(op, clone = assert_flag(clone)) if (op$id %in% names(self$pipeops)) { stopf("PipeOp with id '%s' already in Graph", op$id) } diff --git a/man/Graph.Rd b/man/Graph.Rd index fa51c234e..0db8e24a6 100644 --- a/man/Graph.Rd +++ b/man/Graph.Rd @@ -84,13 +84,14 @@ Identifying string of the help page that shows with \code{help()}. (\code{logical(1)}) -> \code{character} \cr Get IDs of all \code{\link{PipeOp}}s. This is in order that \code{\link{PipeOp}}s were added if \code{sorted} is \code{FALSE}, and topologically sorted if \code{sorted} is \code{TRUE}. -\item \code{add_pipeop(op)} \cr -(\code{\link{PipeOp}} | \code{\link[mlr3:Learner]{Learner}} | \code{\link[mlr3filters:Filter]{Filter}} | \code{...}) -> \code{self} \cr +\item \code{add_pipeop(op, clone = TRUE)} \cr +(\code{\link{PipeOp}} | \code{\link[mlr3:Learner]{Learner}} | \code{\link[mlr3filters:Filter]{Filter}} | \code{...}, \code{logical(1)}) -> \code{self} \cr Mutates \code{\link{Graph}} by adding a \code{\link{PipeOp}} to the \code{\link{Graph}}. This does not add any edges, so the new \code{\link{PipeOp}} will not be connected within the \code{\link{Graph}} at first.\cr Instead of supplying a \code{\link{PipeOp}} directly, an object that can naturally be converted to a \code{\link{PipeOp}} can also be supplied, e.g. a \code{\link[mlr3:Learner]{Learner}} or a \code{\link[mlr3filters:Filter]{Filter}}; see \code{\link[=as_pipeop]{as_pipeop()}}. -The argument given as \code{op} is always cloned; to access a \code{Graph}'s \code{\link{PipeOp}}s by-reference, use \verb{$pipeops}.\cr +The argument given as \code{op} is cloned if \code{clone} is \code{TRUE} (default); to access a \code{Graph}'s \code{\link{PipeOp}}s +by-reference, use \verb{$pipeops}.\cr Note that \verb{$add_pipeop()} is a relatively low-level operation, it is recommended to build graphs using \code{\link{\%>>\%}}. \item \code{add_edge(src_id, dst_id, src_channel = NULL, dst_channel = NULL)} \cr (\code{character(1)}, \code{character(1)}, From 5c98e886c9c7ac1b01fa2ce0b4b0e34d384ee79a Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Wed, 8 Nov 2023 18:23:40 +0100 Subject: [PATCH 07/46] fix: as_graph respects clone argument for PipeOp method --- R/assert_graph.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/assert_graph.R b/R/assert_graph.R index 03c723e14..828f4b4fc 100644 --- a/R/assert_graph.R +++ b/R/assert_graph.R @@ -40,7 +40,7 @@ as_graph = function(x, clone = FALSE) { #' @export as_graph.default = function(x, clone = FALSE) { - Graph$new()$add_pipeop(x) # add_pipeop always clones and checks automatically for convertability + Graph$new()$add_pipeop(x, clone = clone) } #' @export From 43b02ab1fe6a2d230b96050ef1131a810c6a7f2e Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Wed, 22 Nov 2023 11:37:38 +0100 Subject: [PATCH 08/46] add extending vignette to pkgdown --- pkgdown/_pkgdown.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 9e556cf79..1f3a711ca 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -35,6 +35,9 @@ navbar: rss: icon: fa-rss href: https://mlr-org.com/ + extending: + text: Extending + href: extending.html reference: - title: Package From d223981060c5ca8d0e40796571f98a693a9358dd Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 13 Jan 2024 20:47:32 +0100 Subject: [PATCH 09/46] check with new paradox --- .github/workflows/r-cmd-check-paradox.yml | 44 +++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 .github/workflows/r-cmd-check-paradox.yml diff --git a/.github/workflows/r-cmd-check-paradox.yml b/.github/workflows/r-cmd-check-paradox.yml new file mode 100644 index 000000000..784340558 --- /dev/null +++ b/.github/workflows/r-cmd-check-paradox.yml @@ -0,0 +1,44 @@ +# r cmd check workflow of the mlr3 ecosystem v0.1.0 +# https://github.com/mlr-org/actions +on: + workflow_dispatch: + push: + branches: + - main + pull_request: + branches: + - main + +name: r-cmd-check-paradox + +jobs: + r-cmd-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + strategy: + fail-fast: false + matrix: + config: + - {os: ubuntu-latest, r: 'devel'} + - {os: ubuntu-latest, r: 'release'} + + steps: + - uses: actions/checkout@v3 + + - name: paradox + run: 'echo -e "Remotes:\n mlr-org/paradox@s3_params,\n mlr-org/mlr3learners@s3params_compat,\n mlr-org/mlr3filters@s3params_compat" >> DESCRIPTION' + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + - uses: r-lib/actions/check-r-package@v2 From 18f8d3e00d53ad8e0a7717b4e464b19b518e9834 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 13 Jan 2024 22:54:18 +0100 Subject: [PATCH 10/46] preparing --- R/PipeOp.R | 4 ++-- R/PipeOpFilter.R | 16 ++++++++++++---- R/PipeOpImputeLearner.R | 4 +++- R/PipeOpLearner.R | 4 +++- R/PipeOpLearnerCV.R | 10 +++++++--- R/zzz.R | 5 ++++- 6 files changed, 31 insertions(+), 12 deletions(-) diff --git a/R/PipeOp.R b/R/PipeOp.R index 24a12ab6c..37b84e600 100644 --- a/R/PipeOp.R +++ b/R/PipeOp.R @@ -338,7 +338,7 @@ PipeOp = R6Class("PipeOp", id = function(val) { if (!missing(val)) { private$.id = val - if (!is.null(private$.param_set)) { + if (paradox_info$is_old && !is.null(private$.param_set)) { # private$.param_set may be NULL if it is constructed dynamically by active binding private$.param_set$set_id = val } @@ -353,7 +353,7 @@ PipeOp = R6Class("PipeOp", } else { private$.param_set = sourcelist[[1]] } - if (!is.null(self$id)) { + if (paradox_info$is_old && !is.null(self$id)) { private$.param_set$set_id = self$id } } diff --git a/R/PipeOpFilter.R b/R/PipeOpFilter.R index 06d4167a6..2f94d0cdc 100644 --- a/R/PipeOpFilter.R +++ b/R/PipeOpFilter.R @@ -114,16 +114,24 @@ PipeOpFilter = R6Class("PipeOpFilter", initialize = function(filter, id = filter$id, param_vals = list()) { assert_class(filter, "Filter") self$filter = filter$clone(deep = TRUE) - self$filter$param_set$set_id = "" - map(self$filter$param_set$params, function(p) p$tags = union(p$tags, "train")) + if (paradox_info$is_old) { + self$filter$param_set$set_id = "" + map(self$filter$param_set$params, function(p) p$tags = union(p$tags, "train")) + } else { + for (pn in self$filter$param_set$ids()) { + self$filter$param_set$tags[[pn]] = union(self$filter$param_set$tags[[pn]] , "train") + } + } private$.outer_param_set = ParamSet$new(list( ParamInt$new("nfeat", lower = 0, tags = "train"), ParamDbl$new("frac", lower = 0, upper = 1, tags = "train"), ParamDbl$new("cutoff", tags = "train"), ParamInt$new("permuted", lower = 1, tags = "train") )) - private$.outer_param_set$set_id = "filter" - super$initialize(id, alist(private$.outer_param_set, self$filter$param_set), param_vals = param_vals, tags = "feature selection") + if (paradox_info$is_old) { + private$.outer_param_set$set_id = "filter" + } + super$initialize(id, alist(filter = private$.outer_param_set, self$filter$param_set), param_vals = param_vals, tags = "feature selection") } ), private = list( diff --git a/R/PipeOpImputeLearner.R b/R/PipeOpImputeLearner.R index 611dafb5c..8c29b0a14 100644 --- a/R/PipeOpImputeLearner.R +++ b/R/PipeOpImputeLearner.R @@ -101,7 +101,9 @@ PipeOpImputeLearner = R6Class("PipeOpImputeLearner", public = list( initialize = function(learner, id = "imputelearner", param_vals = list()) { private$.learner = as_learner(learner, clone = TRUE) - private$.learner$param_set$set_id = "" + if (paradox_info$is_old) { + private$.learner$param_set$set_id = "" + } id = id %??% private$.learner$id feature_types = switch(private$.learner$task_type, regr = c("integer", "numeric"), diff --git a/R/PipeOpLearner.R b/R/PipeOpLearner.R index fa09fde79..d8797a976 100644 --- a/R/PipeOpLearner.R +++ b/R/PipeOpLearner.R @@ -100,7 +100,9 @@ PipeOpLearner = R6Class("PipeOpLearner", inherit = PipeOp, id = function(val) { if (!missing(val)) { private$.id = val - private$.learner$param_set$set_id = val + if (paradox_info$is_old) { + private$.learner$param_set$set_id = val + } } private$.id }, diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R index b011f89b5..d988f9a00 100644 --- a/R/PipeOpLearnerCV.R +++ b/R/PipeOpLearnerCV.R @@ -116,7 +116,9 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV", public = list( initialize = function(learner, id = NULL, param_vals = list()) { private$.learner = as_learner(learner, clone = TRUE) - private$.learner$param_set$set_id = "" + if (paradox_info$is_old) { + private$.learner$param_set$set_id = "" + } id = id %??% private$.learner$id # FIXME: can be changed when mlr-org/mlr3#470 has an answer type = private$.learner$task_type @@ -128,7 +130,9 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV", ParamLgl$new("keep_response", tags = c("train", "required")) )) private$.crossval_param_set$values = list(method = "cv", folds = 3, keep_response = FALSE) - private$.crossval_param_set$set_id = "resampling" + if (paradox_info$is_old) { + private$.crossval_param_set$set_id = "resampling" + } # Dependencies in paradox have been broken from the start and this is known since at least a year: # https://github.com/mlr-org/paradox/issues/216 # The following would make it _impossible_ to set "method" to "insample", because then "folds" @@ -137,7 +141,7 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV", # in PipeOp ParamSets. # private$.crossval_param_set$add_dep("folds", "method", CondEqual$new("cv")) # don't do this. - super$initialize(id, alist(private$.crossval_param_set, private$.learner$param_set), param_vals = param_vals, can_subset_cols = TRUE, task_type = task_type, tags = c("learner", "ensemble")) + super$initialize(id, alist(resampling = private$.crossval_param_set, private$.learner$param_set), param_vals = param_vals, can_subset_cols = TRUE, task_type = task_type, tags = c("learner", "ensemble")) } ), diff --git a/R/zzz.R b/R/zzz.R index 0573770ba..f811affc9 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -18,6 +18,8 @@ register_mlr3 = function() { "multiplicity"))) } +paradox_info <- list2env(list(is_old = FALSE), parent = emptyenv()) + .onLoad = function(libname, pkgname) { # nocov start register_mlr3() setHook(packageEvent("mlr3", "onLoad"), function(...) register_mlr3(), action = "append") @@ -27,6 +29,7 @@ register_mlr3 = function() { if (Sys.getenv("IN_PKGDOWN") == "true") { lg$set_threshold("warn") } + paradox_info$is_old = !is.null(ps()$set_id) } # nocov end .onUnload = function(libpath) { # nocov start @@ -39,4 +42,4 @@ register_mlr3 = function() { # static code checks should not complain about commonly used data.table columns utils::globalVariables(c("src_id", "dst_id", "name", "op.id", "response", "truth")) -leanify_package() +# leanify_package() From 6501a6c690f57d18c344dd9b776e68b76e768149 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 13 Jan 2024 23:00:33 +0100 Subject: [PATCH 11/46] beginning to use new paradox syntax --- R/LearnerAvg.R | 14 +++++----- R/PipeOpBoxCox.R | 8 +++--- R/PipeOpChunk.R | 2 +- R/PipeOpClassBalancing.R | 4 +-- R/PipeOpClassWeights.R | 2 +- R/PipeOpColApply.R | 2 +- R/PipeOpColRoles.R | 2 +- R/PipeOpCollapseFactors.R | 4 +-- R/PipeOpDateFeatures.R | 24 ++++++++--------- R/PipeOpEncode.R | 2 +- R/PipeOpEncodeImpact.R | 4 +-- R/PipeOpEncodeLmer.R | 2 +- R/PipeOpFilter.R | 10 +++---- R/PipeOpFixFactors.R | 2 +- R/PipeOpHistBin.R | 2 +- R/PipeOpICA.R | 20 +++++++------- R/PipeOpImputeConstant.R | 4 +-- R/PipeOpImputeOOR.R | 6 ++--- R/PipeOpKernelPCA.R | 10 +++---- R/PipeOpLearnerCV.R | 6 ++--- R/PipeOpMissingIndicators.R | 6 ++--- R/PipeOpModelMatrix.R | 2 +- R/PipeOpMultiplicity.R | 2 +- R/PipeOpMutate.R | 4 +-- R/PipeOpNMF.R | 28 +++++++++---------- R/PipeOpPCA.R | 6 ++--- R/PipeOpProxy.R | 2 +- R/PipeOpQuantileBin.R | 2 +- R/PipeOpRandomProjection.R | 4 +-- R/PipeOpRandomResponse.R | 2 +- R/PipeOpRemoveConstants.R | 10 +++---- R/PipeOpRenameColumns.R | 4 +-- R/PipeOpScale.R | 6 ++--- R/PipeOpScaleMaxAbs.R | 2 +- R/PipeOpScaleRange.R | 4 +-- R/PipeOpSelect.R | 2 +- R/PipeOpSmote.R | 4 +-- R/PipeOpSpatialSign.R | 4 +-- R/PipeOpSubsample.R | 6 ++--- R/PipeOpTaskPreproc.R | 2 +- R/PipeOpTextVectorizer.R | 54 ++++++++++++++++++------------------- R/PipeOpTrafo.R | 18 ++++++------- R/PipeOpTuneThreshold.R | 8 +++--- R/PipeOpVtreat.R | 46 +++++++++++++++---------------- R/PipeOpYeoJohnson.R | 8 +++--- 45 files changed, 183 insertions(+), 183 deletions(-) diff --git a/R/LearnerAvg.R b/R/LearnerAvg.R index fd2be1883..687c36492 100644 --- a/R/LearnerAvg.R +++ b/R/LearnerAvg.R @@ -59,9 +59,9 @@ LearnerClassifAvg = R6Class("LearnerClassifAvg", inherit = LearnerClassif, public = list( initialize = function(id = "classif.avg") { ps = ParamSet$new(params = list( - ParamUty$new("measure", custom_check = check_class_or_character("MeasureClassif", mlr_measures), tags = "train"), - ParamUty$new("optimizer", custom_check = check_optimizer, tags = "train"), - ParamUty$new("log_level", tags = "train", + measure = p_uty(custom_check = check_class_or_character("MeasureClassif", mlr_measures), tags = "train"), + optimizer = p_uty(custom_check = check_optimizer, tags = "train"), + log_level = p_uty(tags = "train", function(x) check_string(x) %check||% check_integerish(x)) )) ps$values = list(measure = "classif.ce", optimizer = "nloptr", log_level = "warn") @@ -133,9 +133,9 @@ LearnerRegrAvg = R6Class("LearnerRegrAvg", inherit = LearnerRegr, public = list( initialize = function(id = "regr.avg") { ps = ParamSet$new(params = list( - ParamUty$new("measure", custom_check = check_class_or_character("MeasureRegr", mlr_measures), tags = "train"), - ParamUty$new("optimizer", custom_check = check_optimizer, tags = "train"), - ParamUty$new("log_level", tags = "train", + measure = p_uty(custom_check = check_class_or_character("MeasureRegr", mlr_measures), tags = "train"), + optimizer = p_uty(custom_check = check_optimizer, tags = "train"), + log_level = p_uty(tags = "train", function(x) check_string(x) %check||% check_integerish(x)) )) ps$values = list(measure = "regr.mse", optimizer = "nloptr", log_level = "warn") @@ -198,7 +198,7 @@ optimize_weights_learneravg = function(self, task, n_weights, data) { } measure = pars$measure if (is.character(measure)) measure = msr(measure) - codomain = ParamSet$new(list(ParamDbl$new(id = measure$id, tags = ifelse(measure$minimize, "minimize", "maximize")))) + codomain = ParamSet$new(params = list(ParamDbl$new(id = measure$id, tags = ifelse(measure$minimize, "minimize", "maximize")))) objfun = bbotk::ObjectiveRFun$new( fun = function(xs) learneravg_objfun(xs, task = task, measure = measure, avg_weight_fun = self$weighted_average_prediction, data = data), domain = ps, codomain = codomain diff --git a/R/PipeOpBoxCox.R b/R/PipeOpBoxCox.R index 19034d9dc..24d140ec4 100644 --- a/R/PipeOpBoxCox.R +++ b/R/PipeOpBoxCox.R @@ -66,10 +66,10 @@ PipeOpBoxCox = R6Class("PipeOpBoxCox", public = list( initialize = function(id = "boxcox", param_vals = list()) { ps = ParamSet$new(params = list( - ParamLgl$new("standardize", default = TRUE, tags = c("train", "boxcox")), - ParamDbl$new("eps", default = 0.001, lower = 0, tags = c("train", "boxcox")), - ParamDbl$new("lower", tags = c("train", "boxcox")), - ParamDbl$new("upper", tags = c("train", "boxcox")) + standardize = p_lgl(default = TRUE, tags = c("train", "boxcox")), + eps = p_dbl(default = 0.001, lower = 0, tags = c("train", "boxcox")), + lower = p_dbl(tags = c("train", "boxcox")), + upper = p_dbl(tags = c("train", "boxcox")) )) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "bestNormalize", feature_types = c("numeric", "integer")) diff --git a/R/PipeOpChunk.R b/R/PipeOpChunk.R index 57b69ad7a..8bb7543ba 100644 --- a/R/PipeOpChunk.R +++ b/R/PipeOpChunk.R @@ -65,7 +65,7 @@ PipeOpChunk = R6Class("PipeOpChunk", initialize = function(outnum, id = "chunk", param_vals = list()) { outnum = assert_int(outnum, lower = 1L) ps = ParamSet$new(params = list( - ParamLgl$new("shuffle", tags = "train") + shuffle = p_lgl(tags = "train") )) ps$values = list(shuffle = TRUE) super$initialize(id, diff --git a/R/PipeOpClassBalancing.R b/R/PipeOpClassBalancing.R index 507512f6f..20232e34e 100644 --- a/R/PipeOpClassBalancing.R +++ b/R/PipeOpClassBalancing.R @@ -105,12 +105,12 @@ PipeOpClassBalancing = R6Class("PipeOpClassBalancing", public = list( initialize = function(id = "classbalancing", param_vals = list()) { ps = ParamSet$new(params = list( - ParamDbl$new("ratio", lower = 0, upper = Inf, tags = "train"), + ratio = p_dbl(lower = 0, upper = Inf, tags = "train"), ParamFct$new("reference", levels = c("all", "major", "minor", "nonmajor", "nonminor", "one"), tags = "train"), ParamFct$new("adjust", levels = c("all", "major", "minor", "nonmajor", "nonminor", "upsample", "downsample"), tags = "train"), - ParamLgl$new("shuffle", tags = "train") + shuffle = p_lgl(tags = "train") )) ps$values = list(ratio = 1, reference = "all", adjust = "all", shuffle = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, task_type = "TaskClassif", tags = "imbalanced data") diff --git a/R/PipeOpClassWeights.R b/R/PipeOpClassWeights.R index 802af61da..ce572d037 100644 --- a/R/PipeOpClassWeights.R +++ b/R/PipeOpClassWeights.R @@ -72,7 +72,7 @@ PipeOpClassWeights = R6Class("PipeOpClassWeights", public = list( initialize = function(id = "classweights", param_vals = list()) { ps = ParamSet$new(params = list( - ParamDbl$new("minor_weight", lower = 0, upper = Inf, tags = "train") + minor_weight = p_dbl(lower = 0, upper = Inf, tags = "train") )) ps$values = list(minor_weight = 1) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, task_type = "TaskClassif", tags = "imbalanced data") diff --git a/R/PipeOpColApply.R b/R/PipeOpColApply.R index f4366149d..699c71fca 100644 --- a/R/PipeOpColApply.R +++ b/R/PipeOpColApply.R @@ -93,7 +93,7 @@ PipeOpColApply = R6Class("PipeOpColApply", public = list( initialize = function(id = "colapply", param_vals = list()) { ps = ParamSet$new(params = list( - ParamUty$new("applicator", custom_check = check_function, tags = c("train", "predict")) + applicator = p_uty(custom_check = check_function, tags = c("train", "predict")) )) ps$values = list(applicator = identity) super$initialize(id, ps, param_vals = param_vals) diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index 39173a54e..f7bd0bcef 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -58,7 +58,7 @@ PipeOpColRoles = R6Class("PipeOpColRoles", initialize = function(id = "colroles", param_vals = list()) { ps = ParamSet$new(params = list( # named list, each entry with a vector of roles - ParamUty$new("new_role", tags = c("train", "predict"), custom_check = function(x) { + new_role = p_uty(tags = c("train", "predict"), custom_check = function(x) { first_check = check_list(x, types = "character", any.missing = FALSE, min.len = 1L, names = "named") # return the error directly if this failed if (is.character(first_check)) { diff --git a/R/PipeOpCollapseFactors.R b/R/PipeOpCollapseFactors.R index c0b94bee7..f11887bc2 100644 --- a/R/PipeOpCollapseFactors.R +++ b/R/PipeOpCollapseFactors.R @@ -60,8 +60,8 @@ PipeOpCollapseFactors = R6Class("PipeOpCollapseFactors", public = list( initialize = function(id = "collapsefactors", param_vals = list()) { ps = ParamSet$new(params = list( - ParamDbl$new("no_collapse_above_prevalence", 0, 1, tags = c("train", "predict")), - ParamInt$new("target_level_count", 2, tags = c("train", "predict")) + no_collapse_above_prevalence = p_dbl(0, 1, tags = c("train", "predict")), + target_level_count = p_int(2, tags = c("train", "predict")) )) ps$values = list(no_collapse_above_prevalence = 1, target_level_count = 2) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("factor", "ordered")) diff --git a/R/PipeOpDateFeatures.R b/R/PipeOpDateFeatures.R index b49e8e5f1..93de36c38 100644 --- a/R/PipeOpDateFeatures.R +++ b/R/PipeOpDateFeatures.R @@ -99,18 +99,18 @@ PipeOpDateFeatures = R6Class("PipeOpDateFeatures", public = list( initialize = function(id = "datefeatures", param_vals = list()) { ps = ParamSet$new(params = list( - ParamLgl$new("keep_date_var", tags = c("train", "predict", "required")), - ParamLgl$new("cyclic", tags = c("train", "predict", "required")), - ParamLgl$new("year", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("month", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("week_of_year", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("day_of_year", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("day_of_month", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("day_of_week", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("hour", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("minute", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("second", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("is_day", tags = c("train", "predict", "datepart", "required")) + keep_date_var = p_lgl(tags = c("train", "predict", "required")), + cyclic = p_lgl(tags = c("train", "predict", "required")), + year = p_lgl(tags = c("train", "predict", "datepart", "required")), + month = p_lgl(tags = c("train", "predict", "datepart", "required")), + week_of_year = p_lgl(tags = c("train", "predict", "datepart", "required")), + day_of_year = p_lgl(tags = c("train", "predict", "datepart", "required")), + day_of_month = p_lgl(tags = c("train", "predict", "datepart", "required")), + day_of_week = p_lgl(tags = c("train", "predict", "datepart", "required")), + hour = p_lgl(tags = c("train", "predict", "datepart", "required")), + minute = p_lgl(tags = c("train", "predict", "datepart", "required")), + second = p_lgl(tags = c("train", "predict", "datepart", "required")), + is_day = p_lgl(tags = c("train", "predict", "datepart", "required")) )) ps$values = list(keep_date_var = FALSE, cyclic = FALSE, year = TRUE, month = TRUE, week_of_year = TRUE, day_of_year = TRUE, day_of_month = TRUE, diff --git a/R/PipeOpEncode.R b/R/PipeOpEncode.R index 8addb74f9..df77bae4d 100644 --- a/R/PipeOpEncode.R +++ b/R/PipeOpEncode.R @@ -83,7 +83,7 @@ PipeOpEncode = R6Class("PipeOpEncode", public = list( initialize = function(id = "encode", param_vals = list()) { ps = ParamSet$new(params = list( - ParamFct$new("method", levels = c("one-hot", "treatment", "helmert", "poly", "sum"), tags = c("train", "predict")) + method = p_fct(levels = c("one-hot", "treatment", "helmert", "poly", "sum"), tags = c("train", "predict")) )) ps$values = list(method = "one-hot") super$initialize(id, param_set = ps, param_vals = param_vals, packages = "stats", tags = "encode", feature_types = c("factor", "ordered")) diff --git a/R/PipeOpEncodeImpact.R b/R/PipeOpEncodeImpact.R index 444f5911c..d719b150d 100644 --- a/R/PipeOpEncodeImpact.R +++ b/R/PipeOpEncodeImpact.R @@ -78,8 +78,8 @@ PipeOpEncodeImpact = R6Class("PipeOpEncodeImpact", public = list( initialize = function(id = "encodeimpact", param_vals = list()) { ps = ParamSet$new(params = list( - ParamDbl$new("smoothing", 0, Inf, tags = c("train", "required")), - ParamLgl$new("impute_zero", tags = c("train", "required")) + smoothing = p_dbl(0, Inf, tags = c("train", "required")), + impute_zero = p_lgl(tags = c("train", "required")) )) ps$values = list(smoothing = 1e-4, impute_zero = FALSE) super$initialize(id, param_set = ps, param_vals = param_vals, tags = "encode", feature_types = c("factor", "ordered")) diff --git a/R/PipeOpEncodeLmer.R b/R/PipeOpEncodeLmer.R index 5705cac2c..be834ed97 100644 --- a/R/PipeOpEncodeLmer.R +++ b/R/PipeOpEncodeLmer.R @@ -89,7 +89,7 @@ PipeOpEncodeLmer = R6Class("PipeOpEncodeLmer", public = list( initialize = function(id = "encodelmer", param_vals = list()) { ps = ParamSet$new(params = list( - ParamLgl$new("fast_optim", tags = c("train", "required")) + fast_optim = p_lgl(tags = c("train", "required")) )) ps$values = list(fast_optim = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, packages = c("lme4", "nloptr"), tags = "encode", feature_types = c("factor", "ordered")) diff --git a/R/PipeOpFilter.R b/R/PipeOpFilter.R index 2f94d0cdc..1a87f85f8 100644 --- a/R/PipeOpFilter.R +++ b/R/PipeOpFilter.R @@ -122,11 +122,11 @@ PipeOpFilter = R6Class("PipeOpFilter", self$filter$param_set$tags[[pn]] = union(self$filter$param_set$tags[[pn]] , "train") } } - private$.outer_param_set = ParamSet$new(list( - ParamInt$new("nfeat", lower = 0, tags = "train"), - ParamDbl$new("frac", lower = 0, upper = 1, tags = "train"), - ParamDbl$new("cutoff", tags = "train"), - ParamInt$new("permuted", lower = 1, tags = "train") + private$.outer_param_set = ParamSet$new(params = list( + nfeat = p_int(lower = 0, tags = "train"), + frac = p_dbl(lower = 0, upper = 1, tags = "train"), + cutoff = p_dbl(tags = "train"), + permuted = p_int(lower = 1, tags = "train") )) if (paradox_info$is_old) { private$.outer_param_set$set_id = "filter" diff --git a/R/PipeOpFixFactors.R b/R/PipeOpFixFactors.R index 937dea1d3..e5e20116e 100644 --- a/R/PipeOpFixFactors.R +++ b/R/PipeOpFixFactors.R @@ -52,7 +52,7 @@ PipeOpFixFactors = R6Class("PipeOpFixFactors", public = list( initialize = function(id = "fixfactors", param_vals = list()) { ps = ParamSet$new(params = list( - ParamLgl$new("droplevels", tags = c("train", "predict")) + droplevels = p_lgl(tags = c("train", "predict")) )) ps$values = list(droplevels = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, tags = "robustify", feature_types = c("factor", "ordered")) diff --git a/R/PipeOpHistBin.R b/R/PipeOpHistBin.R index f4924d0fd..a5c96d548 100644 --- a/R/PipeOpHistBin.R +++ b/R/PipeOpHistBin.R @@ -65,7 +65,7 @@ PipeOpHistBin = R6Class("PipeOpHistBin", public = list( initialize = function(id = "histbin", param_vals = list()) { ps = ParamSet$new(params = list( - ParamUty$new("breaks", default = "Sturges", tags = c("train", "hist")) + breaks = p_uty(default = "Sturges", tags = c("train", "hist")) )) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "graphics", feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpICA.R b/R/PipeOpICA.R index ff56a54fd..37f980782 100644 --- a/R/PipeOpICA.R +++ b/R/PipeOpICA.R @@ -91,17 +91,17 @@ PipeOpICA = R6Class("PipeOpICA", public = list( initialize = function(id = "ica", param_vals = list()) { ps = ParamSet$new(params = list( - ParamInt$new("n.comp", lower = 1, upper = Inf, tags = c("train", "ica")), - ParamFct$new("alg.typ", levels = c("parallel", "deflation"), + n.comp = p_int(lower = 1, upper = Inf, tags = c("train", "ica")), + alg.typ = p_fct(levels = c("parallel", "deflation"), default = "parallel", tags = c("train", "ica")), - ParamFct$new("fun", default = "logcosh", levels = c("logcosh", "exp"), tags = c("train", "ica")), - ParamDbl$new("alpha", default = 1.0, lower = 1, upper = 2, tags = c("train", "ica")), - ParamFct$new("method", default = "R", levels = c("C", "R"), tags = c("train", "ica")), - ParamLgl$new("row.norm", default = FALSE, tags = c("train", "ica")), - ParamInt$new("maxit", default = 200, lower = 1, tags = c("train", "ica")), - ParamDbl$new("tol", default = 1e-04, lower = 0, tags = c("train", "ica")), - ParamLgl$new("verbose", default = FALSE, tags = c("train", "ica")), - ParamUty$new("w.init", default = NULL, tags = c("train", "ica")) + fun = p_fct(default = "logcosh", levels = c("logcosh", "exp"), tags = c("train", "ica")), + alpha = p_dbl(default = 1.0, lower = 1, upper = 2, tags = c("train", "ica")), + method = p_fct(default = "R", levels = c("C", "R"), tags = c("train", "ica")), + row.norm = p_lgl(default = FALSE, tags = c("train", "ica")), + maxit = p_int(default = 200, lower = 1, tags = c("train", "ica")), + tol = p_dbl(default = 1e-04, lower = 0, tags = c("train", "ica")), + verbose = p_lgl(default = FALSE, tags = c("train", "ica")), + w.init = p_uty(default = NULL, tags = c("train", "ica")) )) ps$values = list(method = "C") super$initialize(id, param_set = ps, param_vals = param_vals, diff --git a/R/PipeOpImputeConstant.R b/R/PipeOpImputeConstant.R index 22aaa8431..a9dd12c67 100644 --- a/R/PipeOpImputeConstant.R +++ b/R/PipeOpImputeConstant.R @@ -70,8 +70,8 @@ PipeOpImputeConstant = R6Class("PipeOpImputeConstant", public = list( initialize = function(id = "imputeconstant", param_vals = list()) { ps = ParamSet$new(params = list( - ParamUty$new("constant", tags = c("train", "required"), custom_check = check_scalar), - ParamLgl$new("check_levels", tags = c("train", "required")) + constant = p_uty(tags = c("train", "required"), custom_check = check_scalar), + check_levels = p_lgl(tags = c("train", "required")) )) ps$values = list(constant = ".MISSING", check_levels = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("logical", "integer", "numeric", "character", "factor", "ordered", "POSIXct")) diff --git a/R/PipeOpImputeOOR.R b/R/PipeOpImputeOOR.R index 4236e91c2..b224cab32 100644 --- a/R/PipeOpImputeOOR.R +++ b/R/PipeOpImputeOOR.R @@ -81,9 +81,9 @@ PipeOpImputeOOR = R6Class("PipeOpImputeOOR", public = list( initialize = function(id = "imputeoor", param_vals = list()) { ps = ParamSet$new(params = list( - ParamLgl$new("min", tags = c("train", "predict")), - ParamDbl$new("offset", lower = 0, tags = c("train", "predict")), - ParamDbl$new("multiplier", lower = 0, tags = c("train", "predict"))) + min = p_lgl(tags = c("train", "predict")), + offset = p_dbl(lower = 0, tags = c("train", "predict")), + multiplier = p_dbl(lower = 0, tags = c("train", "predict"))) ) ps$values = list(min = TRUE, offset = 1, multiplier = 1) # this is one of the few imputers that handles 'character' features! diff --git a/R/PipeOpKernelPCA.R b/R/PipeOpKernelPCA.R index 6b666ed32..da878e480 100644 --- a/R/PipeOpKernelPCA.R +++ b/R/PipeOpKernelPCA.R @@ -68,12 +68,12 @@ PipeOpKernelPCA = R6Class("PipeOpKernelPCA", public = list( initialize = function(id = "kernelpca", param_vals = list()) { ps = ParamSet$new(params = list( - ParamFct$new("kernel", default = "rbfdot", levels = c("rbfdot", "polydot", + kernel = p_fct(default = "rbfdot", levels = c("rbfdot", "polydot", "vanilladot", "tanhdot", "laplacedot", "besseldot", "anovadot", "splinedot"), tags = c("train", "kpca")), - ParamUty$new("kpar", tags = c("train", "kpca")), - ParamInt$new("features", default = 0, lower = 0, tags = c("train", "kpca")), - ParamDbl$new("th", default = 1e-04, lower = 0, tags = c("train", "kpca")), - ParamUty$new("na.action", default = stats::na.omit, tags = c("train", "kpca")) + kpar = p_uty(tags = c("train", "kpca")), + features = p_int(default = 0, lower = 0, tags = c("train", "kpca")), + th = p_dbl(default = 1e-04, lower = 0, tags = c("train", "kpca")), + na.action = p_uty(default = stats::na.omit, tags = c("train", "kpca")) )) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "kernlab", feature_types = c("numeric", "integer")) diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R index d988f9a00..2e6e829f7 100644 --- a/R/PipeOpLearnerCV.R +++ b/R/PipeOpLearnerCV.R @@ -125,9 +125,9 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV", task_type = mlr_reflections$task_types[type, mult = "first"]$task private$.crossval_param_set = ParamSet$new(params = list( - ParamFct$new("method", levels = c("cv", "insample"), tags = c("train", "required")), - ParamInt$new("folds", lower = 2L, upper = Inf, tags = c("train", "required")), - ParamLgl$new("keep_response", tags = c("train", "required")) + method = p_fct(levels = c("cv", "insample"), tags = c("train", "required")), + folds = p_int(lower = 2L, upper = Inf, tags = c("train", "required")), + keep_response = p_lgl(tags = c("train", "required")) )) private$.crossval_param_set$values = list(method = "cv", folds = 3, keep_response = FALSE) if (paradox_info$is_old) { diff --git a/R/PipeOpMissingIndicators.R b/R/PipeOpMissingIndicators.R index 06407b8ca..bbc584a26 100644 --- a/R/PipeOpMissingIndicators.R +++ b/R/PipeOpMissingIndicators.R @@ -79,9 +79,9 @@ PipeOpMissInd = R6Class("PipeOpMissInd", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "missind", param_vals = list()) { - ps = ParamSet$new(list( - ParamFct$new("which", levels = c("missing_train", "all"), tags = c("train", "required")), - ParamFct$new("type", levels = c("factor", "integer", "logical", "numeric"), tags = c("train", "predict", "required")) + ps = ParamSet$new(params = list( + which = p_fct(levels = c("missing_train", "all"), tags = c("train", "required")), + type = p_fct(levels = c("factor", "integer", "logical", "numeric"), tags = c("train", "predict", "required")) )) ps$values = list(which = "missing_train", type = "factor") super$initialize(id, ps, param_vals = param_vals, tags = "missings") diff --git a/R/PipeOpModelMatrix.R b/R/PipeOpModelMatrix.R index 225554783..ebfa0e222 100644 --- a/R/PipeOpModelMatrix.R +++ b/R/PipeOpModelMatrix.R @@ -60,7 +60,7 @@ PipeOpModelMatrix = R6Class("PipeOpModelMatrix", public = list( initialize = function(id = "modelmatrix", param_vals = list()) { ps = ParamSet$new(params = list( - ParamUty$new("formula", tags = c("train", "predict"), custom_check = check_formula) + formula = p_uty(tags = c("train", "predict"), custom_check = check_formula) )) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "stats") } diff --git a/R/PipeOpMultiplicity.R b/R/PipeOpMultiplicity.R index 78ba96053..532583aa5 100644 --- a/R/PipeOpMultiplicity.R +++ b/R/PipeOpMultiplicity.R @@ -261,7 +261,7 @@ PipeOpReplicate = R6Class("PipeOpReplicate", public = list( initialize = function(id = "replicate", param_vals = list()) { ps = ParamSet$new(params = list( - ParamInt$new("reps", lower = 1, tags = c("train", "predict", "required")) + reps = p_int(lower = 1, tags = c("train", "predict", "required")) )) ps$values = list(reps = 1) super$initialize(id, param_set = ps, param_vals = param_vals, diff --git a/R/PipeOpMutate.R b/R/PipeOpMutate.R index 2121bca7c..230f46e5f 100644 --- a/R/PipeOpMutate.R +++ b/R/PipeOpMutate.R @@ -74,8 +74,8 @@ PipeOpMutate = R6Class("PipeOpMutate", public = list( initialize = function(id = "mutate", param_vals = list()) { ps = ParamSet$new(params = list( - ParamUty$new("mutation", custom_check = check_mutation_formulae, tags = c("train", "predict", "required")), - ParamLgl$new("delete_originals", tags = c("train", "predict", "required")) + mutation = p_uty(custom_check = check_mutation_formulae, tags = c("train", "predict", "required")), + delete_originals = p_lgl(tags = c("train", "predict", "required")) )) ps$values = list(mutation = list(), delete_originals = FALSE) super$initialize(id, ps, param_vals = param_vals) diff --git a/R/PipeOpNMF.R b/R/PipeOpNMF.R index 22c425985..c8a757311 100644 --- a/R/PipeOpNMF.R +++ b/R/PipeOpNMF.R @@ -108,24 +108,24 @@ PipeOpNMF = R6Class("PipeOpNMF", public = list( initialize = function(id = "nmf", param_vals = list()) { ps = ParamSet$new(params = list( - ParamInt$new("rank", lower = 1L, upper = Inf, tags = c("train", "nmf")), - ParamFct$new("method", tags = c("train", "nmf"), + rank = p_int(lower = 1L, upper = Inf, tags = c("train", "nmf")), + method = p_fct(tags = c("train", "nmf"), levels = c("brunet", "lee", "ls-nmf", "nsNMF", "offset", "pe-nmf", "snmf/r", "snmf/l")), - ParamUty$new("seed", tags = c("train", "nmf")), + seed = p_uty(tags = c("train", "nmf")), # NOTE: rng missing, not well documented - ParamInt$new("nrun", lower = 1L, upper = Inf, default = 1L, tags = c("train", "nmf")), + nrun = p_int(lower = 1L, upper = Inf, default = 1L, tags = c("train", "nmf")), # NOTE: model missing, probably over the top here # the following are .options - ParamLgl$new("debug", default = FALSE, tags = c("train", "nmf.options")), - ParamLgl$new("keep.all", default = FALSE, tags = c("train", "nmf.options")), - ParamUty$new("parallel", default = TRUE, tags = c("train", "nmf.options")), - ParamUty$new("parallel.required", tags = c("train", "nmf.options")), - ParamLgl$new("shared.memory", tags = c("train", "nmf.options")), - ParamLgl$new("simplifyCB", default = TRUE, tags = c("train", "nmf.options")), - ParamLgl$new("track", default = FALSE, tags = c("train", "nmf.options")), - ParamUty$new("verbose", default = FALSE, tags = c("train", "nmf.options")), - ParamUty$new("pbackend", tags = c("train", "nmf")), # .pbackend - ParamUty$new("callback", tags = c("train", "nmf")) # .callback + debug = p_lgl(default = FALSE, tags = c("train", "nmf.options")), + keep.all = p_lgl(default = FALSE, tags = c("train", "nmf.options")), + parallel = p_uty(default = TRUE, tags = c("train", "nmf.options")), + parallel.required = p_uty(tags = c("train", "nmf.options")), + shared.memory = p_lgl(tags = c("train", "nmf.options")), + simplifyCB = p_lgl(default = TRUE, tags = c("train", "nmf.options")), + track = p_lgl(default = FALSE, tags = c("train", "nmf.options")), + verbose = p_uty(default = FALSE, tags = c("train", "nmf.options")), + pbackend = p_uty(tags = c("train", "nmf")), # .pbackend + callback = p_uty(tags = c("train", "nmf")) # .callback )) ps$add_dep("keep.all", on = "nrun", cond = CondLarger$new(1)) ps$add_dep("callback", on = "keep.all", cond = CondEqual$new(TRUE)) diff --git a/R/PipeOpPCA.R b/R/PipeOpPCA.R index 210bae161..1f9b557d3 100644 --- a/R/PipeOpPCA.R +++ b/R/PipeOpPCA.R @@ -69,9 +69,9 @@ PipeOpPCA = R6Class("PipeOpPCA", public = list( initialize = function(id = "pca", param_vals = list()) { ps = ParamSet$new(params = list( - ParamLgl$new("center", default = TRUE, tags = c("train", "pca")), - ParamLgl$new("scale.", default = FALSE, tags = c("train", "pca")), - ParamInt$new("rank.", default = NULL, lower = 1, upper = Inf, special_vals = list(NULL), tags = c("train", "pca")) + center = p_lgl(default = TRUE, tags = c("train", "pca")), + scale. = p_lgl(default = FALSE, tags = c("train", "pca")), + rank. = p_int(default = NULL, lower = 1, upper = Inf, special_vals = list(NULL), tags = c("train", "pca")) )) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpProxy.R b/R/PipeOpProxy.R index 36bf45bd3..04e40ce28 100644 --- a/R/PipeOpProxy.R +++ b/R/PipeOpProxy.R @@ -86,7 +86,7 @@ PipeOpProxy = R6Class("PipeOpProxy", # input can be a vararg input channel inname = if (innum) rep_suffix("input", innum) else "..." ps = ParamSet$new(params = list( - ParamUty$new("content", tags = c("train", "predidct", "required"), custom_check = function(x) { + content = p_uty(tags = c("train", "predidct", "required"), custom_check = function(x) { # content must be an object that can be coerced to a Graph and the output number must match tryCatch({ graph = as_graph(x) diff --git a/R/PipeOpQuantileBin.R b/R/PipeOpQuantileBin.R index a3eb6cffa..ea9f67307 100644 --- a/R/PipeOpQuantileBin.R +++ b/R/PipeOpQuantileBin.R @@ -57,7 +57,7 @@ PipeOpQuantileBin = R6Class("PipeOpQuantileBin", public = list( initialize = function(id = "quantilebin", param_vals = list()) { ps = ParamSet$new(params = list( - ParamInt$new("numsplits", lower = 2, special_vals = list(NULL), tags = "train") + numsplits = p_int(lower = 2, special_vals = list(NULL), tags = "train") )) ps$values = list(numsplits = 2L) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "stats", feature_types = c("numeric", "integer")) diff --git a/R/PipeOpRandomProjection.R b/R/PipeOpRandomProjection.R index 65e7d1fa0..5d3bf7acb 100644 --- a/R/PipeOpRandomProjection.R +++ b/R/PipeOpRandomProjection.R @@ -70,8 +70,8 @@ PipeOpRandomProjection = R6Class("PipeOpRandomProjection", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "randomprojection", param_vals = list()) { - ps = ParamSet$new(list( - ParamInt$new("rank", lower = 0, tags = "train") + ps = ParamSet$new(params = list( + rank = p_int(lower = 0, tags = "train") )) ps$values = list(rank = 1) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) diff --git a/R/PipeOpRandomResponse.R b/R/PipeOpRandomResponse.R index c97d36d96..60630aa9f 100644 --- a/R/PipeOpRandomResponse.R +++ b/R/PipeOpRandomResponse.R @@ -84,7 +84,7 @@ PipeOpRandomResponse = R6Class("PipeOpRandomResponse", public = list( initialize = function(id = "randomresponse", param_vals = list(), packages = character(0L)) { ps = ParamSet$new(params = list( - ParamUty$new("rdistfun", tags = c("predict", "required"), custom_check = function(x) { + rdistfun = p_uty(tags = c("predict", "required"), custom_check = function(x) { check_function(x, args = c("n", "mean", "sd")) }) ) diff --git a/R/PipeOpRemoveConstants.R b/R/PipeOpRemoveConstants.R index 6e481777c..7b09f442d 100644 --- a/R/PipeOpRemoveConstants.R +++ b/R/PipeOpRemoveConstants.R @@ -63,11 +63,11 @@ PipeOpRemoveConstants = R6Class("PipeOpRemoveConstants", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "removeconstants", param_vals = list()) { - ps = ParamSet$new(list( - ParamDbl$new("ratio", lower = 0, upper = 1, tags = c("train", "required", "constant_check")), - ParamDbl$new("rel_tol", lower = 0, tags = c("required", "constant_check", "train")), - ParamDbl$new("abs_tol", lower = 0, tags = c("required", "constant_check", "train")), - ParamLgl$new("na_ignore", tags = c("train", "required", "constant_check")) + ps = ParamSet$new(params = list( + ratio = p_dbl(lower = 0, upper = 1, tags = c("train", "required", "constant_check")), + rel_tol = p_dbl(lower = 0, tags = c("required", "constant_check", "train")), + abs_tol = p_dbl(lower = 0, tags = c("required", "constant_check", "train")), + na_ignore = p_lgl(tags = c("train", "required", "constant_check")) )) ps$values = list(ratio = 0, rel_tol = 1e-8, abs_tol = 1e-8, na_ignore = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, tags = "robustify") diff --git a/R/PipeOpRenameColumns.R b/R/PipeOpRenameColumns.R index 80869bdc4..359557f5a 100644 --- a/R/PipeOpRenameColumns.R +++ b/R/PipeOpRenameColumns.R @@ -61,11 +61,11 @@ PipeOpRenameColumns = R6Class("PipeOpRenameColumns", public = list( initialize = function(id = "renamecolumns", param_vals = list()) { ps = ParamSet$new(params = list( - ParamUty$new("renaming", tags = c("train", "predict", "required"), custom_check = function(x) { + renaming = p_uty(tags = c("train", "predict", "required"), custom_check = function(x) { check_character(x, any.missing = FALSE, names = "strict") %check&&% check_names(x, type = "strict") }), - ParamLgl$new("ignore_missing", tags = c("train", "predict", "required")) + ignore_missing = p_lgl(tags = c("train", "predict", "required")) )) ps$values = list(renaming = character(0), ignore_missing = FALSE) super$initialize(id, ps, param_vals = param_vals, can_subset_cols = FALSE) diff --git a/R/PipeOpScale.R b/R/PipeOpScale.R index e4a0e2e34..d97ef349a 100644 --- a/R/PipeOpScale.R +++ b/R/PipeOpScale.R @@ -75,9 +75,9 @@ PipeOpScale = R6Class("PipeOpScale", public = list( initialize = function(id = "scale", param_vals = list()) { ps = ParamSet$new(params = list( - ParamLgl$new("center", default = TRUE, tags = c("train", "scale")), - ParamLgl$new("scale", default = TRUE, tags = c("train", "scale")), - ParamLgl$new("robust", tags = c("train", "required")) + center = p_lgl(default = TRUE, tags = c("train", "scale")), + scale = p_lgl(default = TRUE, tags = c("train", "scale")), + robust = p_lgl(tags = c("train", "required")) )) ps$values = list(robust = FALSE) super$initialize(id = id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) diff --git a/R/PipeOpScaleMaxAbs.R b/R/PipeOpScaleMaxAbs.R index 46b1f67dc..cf5691491 100644 --- a/R/PipeOpScaleMaxAbs.R +++ b/R/PipeOpScaleMaxAbs.R @@ -55,7 +55,7 @@ PipeOpScaleMaxAbs = R6Class("PipeOpScaleMaxAbs", public = list( initialize = function(id = "scalemaxabs", param_vals = list()) { ps = ParamSet$new(params = list( - ParamDbl$new("maxabs", lower = 0, tags = c("required", "train", "predict")) + maxabs = p_dbl(lower = 0, tags = c("required", "train", "predict")) )) ps$values = list(maxabs = 1) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) diff --git a/R/PipeOpScaleRange.R b/R/PipeOpScaleRange.R index 6a455c20e..8c9d583e2 100644 --- a/R/PipeOpScaleRange.R +++ b/R/PipeOpScaleRange.R @@ -60,8 +60,8 @@ PipeOpScaleRange = R6Class("PipeOpScaleRange", public = list( initialize = function(id = "scalerange", param_vals = list()) { ps = ParamSet$new(params = list( - ParamDbl$new("lower", tags = c("required", "train")), - ParamDbl$new("upper", tags = c("required", "train")) + lower = p_dbl(tags = c("required", "train")), + upper = p_dbl(tags = c("required", "train")) )) ps$values = list(lower = 0, upper = 1) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) diff --git a/R/PipeOpSelect.R b/R/PipeOpSelect.R index 57f777e09..ec4e1b4b5 100644 --- a/R/PipeOpSelect.R +++ b/R/PipeOpSelect.R @@ -70,7 +70,7 @@ PipeOpSelect = R6Class("PipeOpSelect", public = list( initialize = function(id = "select", param_vals = list()) { ps = ParamSet$new(params = list( - ParamUty$new("selector", custom_check = check_function, tags = c("train", "required")) + selector = p_uty(custom_check = check_function, tags = c("train", "required")) )) ps$values = list(selector = selector_all()) super$initialize(id, ps, param_vals = param_vals, tags = "feature selection") diff --git a/R/PipeOpSmote.R b/R/PipeOpSmote.R index e9f1e2c01..9e7e0e8e7 100644 --- a/R/PipeOpSmote.R +++ b/R/PipeOpSmote.R @@ -72,10 +72,10 @@ PipeOpSmote = R6Class("PipeOpSmote", public = list( initialize = function(id = "smote", param_vals = list()) { ps = ParamSet$new(params = list( - ParamInt$new("K", lower = 1, default = 5, tags = c("train", "smote")), + K = p_int(lower = 1, default = 5, tags = c("train", "smote")), # dup_size = 0 leads to behaviour different from 1, 2, 3, ..., because it means "autodetect", # so it is a 'special_vals'. - ParamInt$new("dup_size", lower = 1, default = 0, special_vals = list(0), tags = c("train", "smote")) + dup_size = p_int(lower = 1, default = 0, special_vals = list(0), tags = c("train", "smote")) )) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "smotefamily", can_subset_cols = FALSE, tags = "imbalanced data") diff --git a/R/PipeOpSpatialSign.R b/R/PipeOpSpatialSign.R index 9ef2a1ade..464b74e68 100644 --- a/R/PipeOpSpatialSign.R +++ b/R/PipeOpSpatialSign.R @@ -56,8 +56,8 @@ PipeOpSpatialSign = R6Class("PipeOpSpatialSign", public = list( initialize = function(id = "spatialsign", param_vals = list()) { ps = ParamSet$new(params = list( - ParamDbl$new("length", tags = c("train", "predict"), lower = 0), - ParamDbl$new("norm", tags = c("train", "predict"), lower = 0) + length = p_dbl(tags = c("train", "predict"), lower = 0), + norm = p_dbl(tags = c("train", "predict"), lower = 0) )) ps$values = list(norm = 2, length = 1) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) diff --git a/R/PipeOpSubsample.R b/R/PipeOpSubsample.R index 38e1fb5d7..881391545 100644 --- a/R/PipeOpSubsample.R +++ b/R/PipeOpSubsample.R @@ -65,9 +65,9 @@ PipeOpSubsample = R6Class("PipeOpSubsample", public = list( initialize = function(id = "subsample", param_vals = list()) { ps = ParamSet$new(params = list( - ParamDbl$new("frac", lower = 0, upper = Inf, tags = "train"), - ParamLgl$new("stratify", tags = "train"), - ParamLgl$new("replace", tags = "train") + frac = p_dbl(lower = 0, upper = Inf, tags = "train"), + stratify = p_lgl(tags = "train"), + replace = p_lgl(tags = "train") )) ps$values = list(frac = 1 - exp(-1), stratify = FALSE, replace = FALSE) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE) diff --git a/R/PipeOpTaskPreproc.R b/R/PipeOpTaskPreproc.R index 3bff13f46..c87517644 100644 --- a/R/PipeOpTaskPreproc.R +++ b/R/PipeOpTaskPreproc.R @@ -175,7 +175,7 @@ PipeOpTaskPreproc = R6Class("PipeOpTaskPreproc", if (inherits(param_set, "ParamSet")) { param_set$add(acp) } else { - private$.affectcols_ps = ParamSet$new(list(acp)) + private$.affectcols_ps = ParamSet$new(params = list(acp)) param_set = c(param_set, alist(private$.affectcols_ps)) } } diff --git a/R/PipeOpTextVectorizer.R b/R/PipeOpTextVectorizer.R index 4d57c4b76..eb3d4d4d1 100644 --- a/R/PipeOpTextVectorizer.R +++ b/R/PipeOpTextVectorizer.R @@ -166,49 +166,49 @@ PipeOpTextVectorizer = R6Class("PipeOpTextVectorizer", public = list( initialize = function(id = "textvectorizer", param_vals = list()) { ps = ParamSet$new(params = list( - ParamFct$new("stopwords_language", tags = c("train", "predict"), + stopwords_language = p_fct(tags = c("train", "predict"), levels = c("da", "de", "en", "es", "fi", "fr", "hu", "ir", "it", "nl", "no", "pt", "ro", "ru", "sv" , "smart", "none")), - ParamUty$new("extra_stopwords", tags = c("train", "predict"), custom_check = check_character), + extra_stopwords = p_uty(tags = c("train", "predict"), custom_check = check_character), - ParamLgl$new("tolower", default = TRUE, tags = c("train", "predict", "dfm")), - ParamLgl$new("stem", default = FALSE, tags = c("train", "predict", "dfm")), + tolower = p_lgl(default = TRUE, tags = c("train", "predict", "dfm")), + stem = p_lgl(default = FALSE, tags = c("train", "predict", "dfm")), - ParamFct$new("what", default = "word", tags = c("train", "predict", "tokenizer"), + what = p_fct(default = "word", tags = c("train", "predict", "tokenizer"), levels = c("word", "word1", "fasterword", "fastestword", "character", "sentence")), - ParamLgl$new("remove_punct", default = FALSE, tags = c("train", "predict", "tokenizer")), - ParamLgl$new("remove_symbols", default = FALSE, tags = c("train", "predict", "tokenizer")), - ParamLgl$new("remove_numbers", default = FALSE, tags = c("train", "predict", "tokenizer")), - ParamLgl$new("remove_url", default = FALSE, tags = c("train", "predict", "tokenizer")), - ParamLgl$new("remove_separators", default = TRUE, tags = c("train", "predict", "tokenizer")), - ParamLgl$new("split_hyphens", default = FALSE, tags = c("train", "predict", "tokenizer")), + remove_punct = p_lgl(default = FALSE, tags = c("train", "predict", "tokenizer")), + remove_symbols = p_lgl(default = FALSE, tags = c("train", "predict", "tokenizer")), + remove_numbers = p_lgl(default = FALSE, tags = c("train", "predict", "tokenizer")), + remove_url = p_lgl(default = FALSE, tags = c("train", "predict", "tokenizer")), + remove_separators = p_lgl(default = TRUE, tags = c("train", "predict", "tokenizer")), + split_hyphens = p_lgl(default = FALSE, tags = c("train", "predict", "tokenizer")), - ParamUty$new("n", default = 2, tags = c("train", "predict", "ngrams"), custom_check = curry(check_integerish, min.len = 1, lower = 1, any.missing = FALSE)), - ParamUty$new("skip", default = 0, tags = c("train", "predict", "ngrams"), custom_check = curry(check_integerish, min.len = 1, lower = 0, any.missing = FALSE)), + n = p_uty(default = 2, tags = c("train", "predict", "ngrams"), custom_check = curry(check_integerish, min.len = 1, lower = 1, any.missing = FALSE)), + skip = p_uty(default = 0, tags = c("train", "predict", "ngrams"), custom_check = curry(check_integerish, min.len = 1, lower = 0, any.missing = FALSE)), - ParamDbl$new("sparsity", lower = 0, upper = 1, default = NULL, + sparsity = p_dbl(lower = 0, upper = 1, default = NULL, tags = c("train", "dfm_trim"), special_vals = list(NULL)), - ParamFct$new("termfreq_type", default = "count", tags = c("train", "dfm_trim"), + termfreq_type = p_fct(default = "count", tags = c("train", "dfm_trim"), levels = c("count", "prop", "rank", "quantile")), - ParamDbl$new("min_termfreq", lower = 0, default = NULL, + min_termfreq = p_dbl(lower = 0, default = NULL, tags = c("train", "dfm_trim"), special_vals = list(NULL)), - ParamDbl$new("max_termfreq", lower = 0, default = NULL, + max_termfreq = p_dbl(lower = 0, default = NULL, tags = c("train", "dfm_trim"), special_vals = list(NULL)), - ParamFct$new("scheme_df", default = "count", tags = c("train", "docfreq"), + scheme_df = p_fct(default = "count", tags = c("train", "docfreq"), levels = c("count", "inverse", "inversemax", "inverseprob", "unary")), - ParamDbl$new("smoothing_df", lower = 0, default = 0, tags = c("train", "docfreq")), - ParamDbl$new("k_df", lower = 0, tags = c("train", "docfreq")), - ParamDbl$new("threshold_df", lower = 0, default = 0, tags = c("train", "docfreq")), - ParamDbl$new("base_df", lower = 0, default = 10, tags = c("train", "docfreq")), + smoothing_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq")), + k_df = p_dbl(lower = 0, tags = c("train", "docfreq")), + threshold_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq")), + base_df = p_dbl(lower = 0, default = 10, tags = c("train", "docfreq")), - ParamFct$new("scheme_tf", default = "count", tags = c("train", "predict", "dfm_weight"), + scheme_tf = p_fct(default = "count", tags = c("train", "predict", "dfm_weight"), levels = c("count", "prop", "propmax", "logcount", "boolean", "augmented", "logave")), - ParamDbl$new("k_tf", lower = 0, upper = 1, tags = c("train", "predict", "dfm_weight")), - ParamDbl$new("base_tf", lower = 0, default = 10, tags = c("train", "predict", "dfm_weight")), + k_tf = p_dbl(lower = 0, upper = 1, tags = c("train", "predict", "dfm_weight")), + base_tf = p_dbl(lower = 0, default = 10, tags = c("train", "predict", "dfm_weight")), - ParamFct$new("return_type", levels = c("bow", "integer_sequence", "factor_sequence"), tags = c("train", "predict")), - ParamInt$new("sequence_length", default = 0, lower = 0, upper = Inf, tags = c("train", "predict", "integer_sequence")) + return_type = p_fct(levels = c("bow", "integer_sequence", "factor_sequence"), tags = c("train", "predict")), + sequence_length = p_int(default = 0, lower = 0, upper = Inf, tags = c("train", "predict", "integer_sequence")) ))$ add_dep("base_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ add_dep("smoothing_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ diff --git a/R/PipeOpTrafo.R b/R/PipeOpTrafo.R index 54365b3db..c25ff95e0 100644 --- a/R/PipeOpTrafo.R +++ b/R/PipeOpTrafo.R @@ -348,9 +348,9 @@ PipeOpTargetMutate = R6Class("PipeOpTargetMutate", public = list( initialize = function(id = "targetmutate", param_vals = list(), new_task_type = NULL) { private$.new_task_type = assert_choice(new_task_type, mlr_reflections$task_types$type, null.ok = TRUE) - ps = ParamSet$new(list( - ParamUty$new("trafo", tags = c("train", "predict"), custom_check = function(x) check_function(x, nargs = 1L)), - ParamUty$new("inverter", tags = "predict", custom_check = function(x) check_function(x, nargs = 1L)) + ps = ParamSet$new(params = list( + trafo = p_uty(tags = c("train", "predict"), custom_check = function(x) check_function(x, nargs = 1L)), + inverter = p_uty(tags = "predict", custom_check = function(x) check_function(x, nargs = 1L)) )) # We could add a condition here for new_task_type on trafo and inverter when mlr-org/paradox#278 has an answer. # HOWEVER conditions are broken in paradox, it is a terrible idea to use them in PipeOps, @@ -458,8 +458,8 @@ PipeOpTargetTrafoScaleRange = R6Class("PipeOpTargetTrafoScaleRange", public = list( initialize = function(id = "targettrafoscalerange", param_vals = list()) { ps = ParamSet$new(params = list( - ParamDbl$new("lower", tags = c("required", "train")), - ParamDbl$new("upper", tags = c("required", "train")) + lower = p_dbl(tags = c("required", "train")), + upper = p_dbl(tags = c("required", "train")) )) ps$values = list(lower = 0, upper = 1) super$initialize(id = id, param_set = ps, param_vals = param_vals, task_type_in = "TaskRegr") @@ -566,10 +566,10 @@ PipeOpUpdateTarget = R6Class("PipeOpUpdateTarget", public = list( initialize = function(id = "update_target", param_vals = list()) { ps = ParamSet$new(params = list( - ParamUty$new("trafo", tags = c("train", "predict"), custom_check = function(x) check_function(x, nargs = 1L)), - ParamUty$new("new_target_name", tags = c("train", "predict"), custom_check = function(x) check_character(x, any.missing = FALSE, len = 1L)), - ParamUty$new("new_task_type", tags = c("train", "predict"), custom_check = function(x) check_choice(x, choices = mlr_reflections$task_types$type)), - ParamLgl$new("drop_original_target", tags = c("train", "predict")) + trafo = p_uty(tags = c("train", "predict"), custom_check = function(x) check_function(x, nargs = 1L)), + new_target_name = p_uty(tags = c("train", "predict"), custom_check = function(x) check_character(x, any.missing = FALSE, len = 1L)), + new_task_type = p_uty(tags = c("train", "predict"), custom_check = function(x) check_choice(x, choices = mlr_reflections$task_types$type)), + drop_original_target = p_lgl(tags = c("train", "predict")) ) ) ps$values = list(trafo = identity, drop_original_target = TRUE) diff --git a/R/PipeOpTuneThreshold.R b/R/PipeOpTuneThreshold.R index 311f5f433..47a182e16 100644 --- a/R/PipeOpTuneThreshold.R +++ b/R/PipeOpTuneThreshold.R @@ -76,9 +76,9 @@ PipeOpTuneThreshold = R6Class("PipeOpTuneThreshold", public = list( initialize = function(id = "tunethreshold", param_vals = list()) { ps = ParamSet$new(params = list( - ParamUty$new("measure", custom_check = check_class_or_character("Measure", mlr_measures), tags = "train"), - ParamUty$new("optimizer", custom_check = check_optimizer, tags = "train"), - ParamUty$new("log_level", tags = "train", + measure = p_uty(custom_check = check_class_or_character("Measure", mlr_measures), tags = "train"), + optimizer = p_uty(custom_check = check_optimizer, tags = "train"), + log_level = p_uty(tags = "train", function(x) check_string(x) %check||% check_integerish(x)) )) ps$values = list(measure = "classif.ce", optimizer = "gensa", log_level = "warn") @@ -120,7 +120,7 @@ PipeOpTuneThreshold = R6Class("PipeOpTuneThreshold", ps = private$.make_param_set(pred) measure = self$param_set$values$measure if (is.character(measure)) measure = msr(measure) else measure - codomain = ParamSet$new(list(ParamDbl$new(id = measure$id, tags = ifelse(measure$minimize, "minimize", "maximize")))) + codomain = ParamSet$new(params = list(ParamDbl$new(id = measure$id, tags = ifelse(measure$minimize, "minimize", "maximize")))) objfun = bbotk::ObjectiveRFun$new( fun = function(xs) private$.objfun(xs, pred = pred, measure = measure), domain = ps, codomain = codomain diff --git a/R/PipeOpVtreat.R b/R/PipeOpVtreat.R index acb46899e..abc8cdff5 100644 --- a/R/PipeOpVtreat.R +++ b/R/PipeOpVtreat.R @@ -129,35 +129,35 @@ PipeOpVtreat = R6Class("PipeOpVtreat", public = list( initialize = function(id = "vtreat", param_vals = list()) { ps = ParamSet$new(params = list( - ParamLgl$new("recommended", tags = c("train", "predict")), - ParamUty$new("cols_to_copy", custom_check = checkmate::check_function, tags = c("train", "predict")), + recommended = p_lgl(tags = c("train", "predict")), + cols_to_copy = p_uty(custom_check = checkmate::check_function, tags = c("train", "predict")), # tags stand for: regression vtreat::regression_parameters() / classification vtreat::classification_parameters() / multinomial vtreat::multinomial_parameters() - ParamDbl$new("minFraction", lower = 0, upper = 1, default = 0.02, tags = c("train", "regression", "classification", "multinomial")), - ParamDbl$new("smFactor", lower = 0, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), - ParamInt$new("rareCount", lower = 0L, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), - ParamDbl$new("rareSig", lower = 0, upper = 1, special_vals = list(NULL), tags = c("train", "regression", "classification", "multinomial")), # default NULL for regression, classification, 1 for multinomial - ParamDbl$new("collarProb", lower = 0, upper = 1, default = 0, tags = c("train", "regression", "classification", "multinomial")), - ParamLgl$new("doCollar", default = FALSE, tags = c("train", "regression", "classification", "multinomial")), - ParamUty$new("codeRestriction", default = NULL, custom_check = function(x) checkmate::check_character(x, any.missing = FALSE, null.ok = TRUE), + minFraction = p_dbl(lower = 0, upper = 1, default = 0.02, tags = c("train", "regression", "classification", "multinomial")), + smFactor = p_dbl(lower = 0, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), + rareCount = p_int(lower = 0L, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), + rareSig = p_dbl(lower = 0, upper = 1, special_vals = list(NULL), tags = c("train", "regression", "classification", "multinomial")), # default NULL for regression, classification, 1 for multinomial + collarProb = p_dbl(lower = 0, upper = 1, default = 0, tags = c("train", "regression", "classification", "multinomial")), + doCollar = p_lgl(default = FALSE, tags = c("train", "regression", "classification", "multinomial")), + codeRestriction = p_uty(default = NULL, custom_check = function(x) checkmate::check_character(x, any.missing = FALSE, null.ok = TRUE), tags = c("train", "regression", "classification", "multinomial")), - ParamUty$new("customCoders", default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "regression", "classification", "multinomial")), - ParamUty$new("splitFunction", default = NULL, custom_check = function(x) checkmate::check_function(x, args = c("nSplits", "nRows", "dframe", "y"), null.ok = TRUE), + customCoders = p_uty(default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "regression", "classification", "multinomial")), + splitFunction = p_uty(default = NULL, custom_check = function(x) checkmate::check_function(x, args = c("nSplits", "nRows", "dframe", "y"), null.ok = TRUE), tags = c("train", "regression", "classification", "multinomial")), - ParamInt$new("ncross", lower = 2L, upper = Inf, default = 3L, tags = c("train", "regression", "classification", "multinomial")), - ParamLgl$new("forceSplit", default = FALSE, tags = c("train", "regression", "classification", "multinomial")), - ParamLgl$new("catScaling", tags = c("train", "regression", "classification", "multinomial")), # default TRUE for regression, classification, FALSE for multinomial - ParamLgl$new("verbose", default = FALSE, tags = c("train", "regression", "classification", "multinomial")), - ParamLgl$new("use_paralell", default = TRUE, tags = c("train", "regression", "classification", "multinomial")), - ParamUty$new("missingness_imputation", default = NULL, custom_check = function(x) checkmate::check_function(x, args = c("values", "weights"), null.ok = TRUE), + ncross = p_int(lower = 2L, upper = Inf, default = 3L, tags = c("train", "regression", "classification", "multinomial")), + forceSplit = p_lgl(default = FALSE, tags = c("train", "regression", "classification", "multinomial")), + catScaling = p_lgl(tags = c("train", "regression", "classification", "multinomial")), # default TRUE for regression, classification, FALSE for multinomial + verbose = p_lgl(default = FALSE, tags = c("train", "regression", "classification", "multinomial")), + use_paralell = p_lgl(default = TRUE, tags = c("train", "regression", "classification", "multinomial")), + missingness_imputation = p_uty(default = NULL, custom_check = function(x) checkmate::check_function(x, args = c("values", "weights"), null.ok = TRUE), tags = c("train", "regression", "classification", "multinomial")), - ParamDbl$new("pruneSig", lower = 0, upper = 1, special_vals = list(NULL), default = NULL, tags = c("train", "regression", "classification")), - ParamLgl$new("scale", default = FALSE, tags = c("train", "regression", "classification", "multinomial")), - ParamUty$new("varRestriction", default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "regression", "classification")), - ParamUty$new("trackedValues", default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "regression", "classification")), + pruneSig = p_dbl(lower = 0, upper = 1, special_vals = list(NULL), default = NULL, tags = c("train", "regression", "classification")), + scale = p_lgl(default = FALSE, tags = c("train", "regression", "classification", "multinomial")), + varRestriction = p_uty(default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "regression", "classification")), + trackedValues = p_uty(default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "regression", "classification")), # NOTE: check_for_duplicate_frames not needed - ParamUty$new("y_dependent_treatments", default = "catB", custom_check = function(x) checkmate::check_character(x, any.missing = FALSE), tags = c("train", "multinomial")), + y_dependent_treatments = p_uty(default = "catB", custom_check = function(x) checkmate::check_character(x, any.missing = FALSE), tags = c("train", "multinomial")), # NOTE: imputation_map is also in multinomial_parameters(); this is redundant so only include it here - ParamUty$new("imputation_map", default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "predict")) + imputation_map = p_uty(default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "predict")) # NOTE: parallelCluster missing intentionally and will be set to NULL )) ps$add_dep("collarProb", on = "doCollar", cond = CondEqual$new(TRUE)) diff --git a/R/PipeOpYeoJohnson.R b/R/PipeOpYeoJohnson.R index 97923840d..4b1727ad5 100644 --- a/R/PipeOpYeoJohnson.R +++ b/R/PipeOpYeoJohnson.R @@ -68,10 +68,10 @@ PipeOpYeoJohnson = R6Class("PipeOpYeoJohnson", public = list( initialize = function(id = "yeojohnson", param_vals = list()) { ps = ParamSet$new(params = list( - ParamDbl$new("eps", default = 0.001, lower = 0, tags = c("train", "yj")), - ParamLgl$new("standardize", default = TRUE, tags = c("train", "yj")), - ParamDbl$new("lower", tags = c("train", "yj")), - ParamDbl$new("upper", tags = c("train", "yj")) + eps = p_dbl(default = 0.001, lower = 0, tags = c("train", "yj")), + standardize = p_lgl(default = TRUE, tags = c("train", "yj")), + lower = p_dbl(tags = c("train", "yj")), + upper = p_dbl(tags = c("train", "yj")) )) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "bestNormalize", feature_types = c("numeric", "integer")) From bae8581b8cf1e00d22a5c6f53ad339d6d6f31dca Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 13 Jan 2024 23:18:25 +0100 Subject: [PATCH 12/46] ps() instead of ParamSet() --- R/LearnerAvg.R | 8 ++++---- R/PipeOpBoxCox.R | 4 ++-- R/PipeOpChunk.R | 4 ++-- R/PipeOpClassBalancing.R | 4 ++-- R/PipeOpClassWeights.R | 4 ++-- R/PipeOpColApply.R | 4 ++-- R/PipeOpColRoles.R | 4 ++-- R/PipeOpCollapseFactors.R | 4 ++-- R/PipeOpDateFeatures.R | 4 ++-- R/PipeOpEncode.R | 4 ++-- R/PipeOpEncodeImpact.R | 4 ++-- R/PipeOpEncodeLmer.R | 4 ++-- R/PipeOpFilter.R | 4 ++-- R/PipeOpFixFactors.R | 4 ++-- R/PipeOpHistBin.R | 4 ++-- R/PipeOpICA.R | 4 ++-- R/PipeOpImputeConstant.R | 4 ++-- R/PipeOpImputeOOR.R | 2 +- R/PipeOpKernelPCA.R | 4 ++-- R/PipeOpLearnerCV.R | 4 ++-- R/PipeOpMissingIndicators.R | 4 ++-- R/PipeOpModelMatrix.R | 4 ++-- R/PipeOpMultiplicity.R | 4 ++-- R/PipeOpMutate.R | 4 ++-- R/PipeOpNMF.R | 4 ++-- R/PipeOpPCA.R | 4 ++-- R/PipeOpProxy.R | 4 ++-- R/PipeOpQuantileBin.R | 4 ++-- R/PipeOpRandomProjection.R | 4 ++-- R/PipeOpRandomResponse.R | 2 +- R/PipeOpRemoveConstants.R | 4 ++-- R/PipeOpRenameColumns.R | 4 ++-- R/PipeOpScale.R | 4 ++-- R/PipeOpScaleMaxAbs.R | 4 ++-- R/PipeOpScaleRange.R | 4 ++-- R/PipeOpSelect.R | 4 ++-- R/PipeOpSmote.R | 4 ++-- R/PipeOpSpatialSign.R | 4 ++-- R/PipeOpSubsample.R | 4 ++-- R/PipeOpTextVectorizer.R | 2 +- R/PipeOpTrafo.R | 10 +++++----- R/PipeOpTuneThreshold.R | 4 ++-- R/PipeOpVtreat.R | 4 ++-- R/PipeOpYeoJohnson.R | 4 ++-- 44 files changed, 90 insertions(+), 90 deletions(-) diff --git a/R/LearnerAvg.R b/R/LearnerAvg.R index 687c36492..727da1f86 100644 --- a/R/LearnerAvg.R +++ b/R/LearnerAvg.R @@ -58,12 +58,12 @@ LearnerClassifAvg = R6Class("LearnerClassifAvg", inherit = LearnerClassif, public = list( initialize = function(id = "classif.avg") { - ps = ParamSet$new(params = list( + ps = ps( measure = p_uty(custom_check = check_class_or_character("MeasureClassif", mlr_measures), tags = "train"), optimizer = p_uty(custom_check = check_optimizer, tags = "train"), log_level = p_uty(tags = "train", function(x) check_string(x) %check||% check_integerish(x)) - )) + ) ps$values = list(measure = "classif.ce", optimizer = "nloptr", log_level = "warn") super$initialize( id = id, @@ -132,12 +132,12 @@ LearnerClassifAvg = R6Class("LearnerClassifAvg", inherit = LearnerClassif, LearnerRegrAvg = R6Class("LearnerRegrAvg", inherit = LearnerRegr, public = list( initialize = function(id = "regr.avg") { - ps = ParamSet$new(params = list( + ps = ps( measure = p_uty(custom_check = check_class_or_character("MeasureRegr", mlr_measures), tags = "train"), optimizer = p_uty(custom_check = check_optimizer, tags = "train"), log_level = p_uty(tags = "train", function(x) check_string(x) %check||% check_integerish(x)) - )) + ) ps$values = list(measure = "regr.mse", optimizer = "nloptr", log_level = "warn") super$initialize( id = id, diff --git a/R/PipeOpBoxCox.R b/R/PipeOpBoxCox.R index 24d140ec4..43924c7f2 100644 --- a/R/PipeOpBoxCox.R +++ b/R/PipeOpBoxCox.R @@ -65,12 +65,12 @@ PipeOpBoxCox = R6Class("PipeOpBoxCox", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "boxcox", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( standardize = p_lgl(default = TRUE, tags = c("train", "boxcox")), eps = p_dbl(default = 0.001, lower = 0, tags = c("train", "boxcox")), lower = p_dbl(tags = c("train", "boxcox")), upper = p_dbl(tags = c("train", "boxcox")) - )) + ) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "bestNormalize", feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpChunk.R b/R/PipeOpChunk.R index 8bb7543ba..03aacce79 100644 --- a/R/PipeOpChunk.R +++ b/R/PipeOpChunk.R @@ -64,9 +64,9 @@ PipeOpChunk = R6Class("PipeOpChunk", public = list( initialize = function(outnum, id = "chunk", param_vals = list()) { outnum = assert_int(outnum, lower = 1L) - ps = ParamSet$new(params = list( + ps = ps( shuffle = p_lgl(tags = "train") - )) + ) ps$values = list(shuffle = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, diff --git a/R/PipeOpClassBalancing.R b/R/PipeOpClassBalancing.R index 20232e34e..bbb63faeb 100644 --- a/R/PipeOpClassBalancing.R +++ b/R/PipeOpClassBalancing.R @@ -104,14 +104,14 @@ PipeOpClassBalancing = R6Class("PipeOpClassBalancing", public = list( initialize = function(id = "classbalancing", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( ratio = p_dbl(lower = 0, upper = Inf, tags = "train"), ParamFct$new("reference", levels = c("all", "major", "minor", "nonmajor", "nonminor", "one"), tags = "train"), ParamFct$new("adjust", levels = c("all", "major", "minor", "nonmajor", "nonminor", "upsample", "downsample"), tags = "train"), shuffle = p_lgl(tags = "train") - )) + ) ps$values = list(ratio = 1, reference = "all", adjust = "all", shuffle = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, task_type = "TaskClassif", tags = "imbalanced data") } diff --git a/R/PipeOpClassWeights.R b/R/PipeOpClassWeights.R index ce572d037..9764b3535 100644 --- a/R/PipeOpClassWeights.R +++ b/R/PipeOpClassWeights.R @@ -71,9 +71,9 @@ PipeOpClassWeights = R6Class("PipeOpClassWeights", public = list( initialize = function(id = "classweights", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( minor_weight = p_dbl(lower = 0, upper = Inf, tags = "train") - )) + ) ps$values = list(minor_weight = 1) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, task_type = "TaskClassif", tags = "imbalanced data") } diff --git a/R/PipeOpColApply.R b/R/PipeOpColApply.R index 699c71fca..da1f78f94 100644 --- a/R/PipeOpColApply.R +++ b/R/PipeOpColApply.R @@ -92,9 +92,9 @@ PipeOpColApply = R6Class("PipeOpColApply", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "colapply", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( applicator = p_uty(custom_check = check_function, tags = c("train", "predict")) - )) + ) ps$values = list(applicator = identity) super$initialize(id, ps, param_vals = param_vals) } diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index f7bd0bcef..606572854 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -56,7 +56,7 @@ PipeOpColRoles = R6Class("PipeOpColRoles", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "colroles", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( # named list, each entry with a vector of roles new_role = p_uty(tags = c("train", "predict"), custom_check = function(x) { first_check = check_list(x, types = "character", any.missing = FALSE, min.len = 1L, names = "named") @@ -69,7 +69,7 @@ PipeOpColRoles = R6Class("PipeOpColRoles", all_col_roles = unique(unlist(mlr3::mlr_reflections$task_col_roles)) check_subset(unlist(x), all_col_roles[all_col_roles != "target"]) }) - )) + ) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE) } ), diff --git a/R/PipeOpCollapseFactors.R b/R/PipeOpCollapseFactors.R index f11887bc2..7d9532e8e 100644 --- a/R/PipeOpCollapseFactors.R +++ b/R/PipeOpCollapseFactors.R @@ -59,10 +59,10 @@ PipeOpCollapseFactors = R6Class("PipeOpCollapseFactors", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "collapsefactors", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( no_collapse_above_prevalence = p_dbl(0, 1, tags = c("train", "predict")), target_level_count = p_int(2, tags = c("train", "predict")) - )) + ) ps$values = list(no_collapse_above_prevalence = 1, target_level_count = 2) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("factor", "ordered")) } diff --git a/R/PipeOpDateFeatures.R b/R/PipeOpDateFeatures.R index 93de36c38..d9908265f 100644 --- a/R/PipeOpDateFeatures.R +++ b/R/PipeOpDateFeatures.R @@ -98,7 +98,7 @@ PipeOpDateFeatures = R6Class("PipeOpDateFeatures", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "datefeatures", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( keep_date_var = p_lgl(tags = c("train", "predict", "required")), cyclic = p_lgl(tags = c("train", "predict", "required")), year = p_lgl(tags = c("train", "predict", "datepart", "required")), @@ -111,7 +111,7 @@ PipeOpDateFeatures = R6Class("PipeOpDateFeatures", minute = p_lgl(tags = c("train", "predict", "datepart", "required")), second = p_lgl(tags = c("train", "predict", "datepart", "required")), is_day = p_lgl(tags = c("train", "predict", "datepart", "required")) - )) + ) ps$values = list(keep_date_var = FALSE, cyclic = FALSE, year = TRUE, month = TRUE, week_of_year = TRUE, day_of_year = TRUE, day_of_month = TRUE, day_of_week = TRUE, hour = TRUE, minute = TRUE, second = TRUE, is_day = TRUE) diff --git a/R/PipeOpEncode.R b/R/PipeOpEncode.R index df77bae4d..35e630525 100644 --- a/R/PipeOpEncode.R +++ b/R/PipeOpEncode.R @@ -82,9 +82,9 @@ PipeOpEncode = R6Class("PipeOpEncode", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "encode", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( method = p_fct(levels = c("one-hot", "treatment", "helmert", "poly", "sum"), tags = c("train", "predict")) - )) + ) ps$values = list(method = "one-hot") super$initialize(id, param_set = ps, param_vals = param_vals, packages = "stats", tags = "encode", feature_types = c("factor", "ordered")) } diff --git a/R/PipeOpEncodeImpact.R b/R/PipeOpEncodeImpact.R index d719b150d..5de079907 100644 --- a/R/PipeOpEncodeImpact.R +++ b/R/PipeOpEncodeImpact.R @@ -77,10 +77,10 @@ PipeOpEncodeImpact = R6Class("PipeOpEncodeImpact", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "encodeimpact", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( smoothing = p_dbl(0, Inf, tags = c("train", "required")), impute_zero = p_lgl(tags = c("train", "required")) - )) + ) ps$values = list(smoothing = 1e-4, impute_zero = FALSE) super$initialize(id, param_set = ps, param_vals = param_vals, tags = "encode", feature_types = c("factor", "ordered")) } diff --git a/R/PipeOpEncodeLmer.R b/R/PipeOpEncodeLmer.R index be834ed97..abdb031b0 100644 --- a/R/PipeOpEncodeLmer.R +++ b/R/PipeOpEncodeLmer.R @@ -88,9 +88,9 @@ PipeOpEncodeLmer = R6Class("PipeOpEncodeLmer", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "encodelmer", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( fast_optim = p_lgl(tags = c("train", "required")) - )) + ) ps$values = list(fast_optim = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, packages = c("lme4", "nloptr"), tags = "encode", feature_types = c("factor", "ordered")) } diff --git a/R/PipeOpFilter.R b/R/PipeOpFilter.R index 1a87f85f8..1049bab3a 100644 --- a/R/PipeOpFilter.R +++ b/R/PipeOpFilter.R @@ -122,12 +122,12 @@ PipeOpFilter = R6Class("PipeOpFilter", self$filter$param_set$tags[[pn]] = union(self$filter$param_set$tags[[pn]] , "train") } } - private$.outer_param_set = ParamSet$new(params = list( + private$.outer_param_set = ps( nfeat = p_int(lower = 0, tags = "train"), frac = p_dbl(lower = 0, upper = 1, tags = "train"), cutoff = p_dbl(tags = "train"), permuted = p_int(lower = 1, tags = "train") - )) + ) if (paradox_info$is_old) { private$.outer_param_set$set_id = "filter" } diff --git a/R/PipeOpFixFactors.R b/R/PipeOpFixFactors.R index e5e20116e..57f3dfeff 100644 --- a/R/PipeOpFixFactors.R +++ b/R/PipeOpFixFactors.R @@ -51,9 +51,9 @@ PipeOpFixFactors = R6Class("PipeOpFixFactors", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "fixfactors", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( droplevels = p_lgl(tags = c("train", "predict")) - )) + ) ps$values = list(droplevels = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, tags = "robustify", feature_types = c("factor", "ordered")) } diff --git a/R/PipeOpHistBin.R b/R/PipeOpHistBin.R index a5c96d548..921145826 100644 --- a/R/PipeOpHistBin.R +++ b/R/PipeOpHistBin.R @@ -64,9 +64,9 @@ PipeOpHistBin = R6Class("PipeOpHistBin", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "histbin", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( breaks = p_uty(default = "Sturges", tags = c("train", "hist")) - )) + ) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "graphics", feature_types = c("numeric", "integer")) } ), diff --git a/R/PipeOpICA.R b/R/PipeOpICA.R index 37f980782..9f363ed3e 100644 --- a/R/PipeOpICA.R +++ b/R/PipeOpICA.R @@ -90,7 +90,7 @@ PipeOpICA = R6Class("PipeOpICA", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "ica", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( n.comp = p_int(lower = 1, upper = Inf, tags = c("train", "ica")), alg.typ = p_fct(levels = c("parallel", "deflation"), default = "parallel", tags = c("train", "ica")), @@ -102,7 +102,7 @@ PipeOpICA = R6Class("PipeOpICA", tol = p_dbl(default = 1e-04, lower = 0, tags = c("train", "ica")), verbose = p_lgl(default = FALSE, tags = c("train", "ica")), w.init = p_uty(default = NULL, tags = c("train", "ica")) - )) + ) ps$values = list(method = "C") super$initialize(id, param_set = ps, param_vals = param_vals, packages = "fastICA", feature_types = c("numeric", "integer")) diff --git a/R/PipeOpImputeConstant.R b/R/PipeOpImputeConstant.R index a9dd12c67..4554a28bd 100644 --- a/R/PipeOpImputeConstant.R +++ b/R/PipeOpImputeConstant.R @@ -69,10 +69,10 @@ PipeOpImputeConstant = R6Class("PipeOpImputeConstant", inherit = PipeOpImpute, public = list( initialize = function(id = "imputeconstant", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( constant = p_uty(tags = c("train", "required"), custom_check = check_scalar), check_levels = p_lgl(tags = c("train", "required")) - )) + ) ps$values = list(constant = ".MISSING", check_levels = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("logical", "integer", "numeric", "character", "factor", "ordered", "POSIXct")) } diff --git a/R/PipeOpImputeOOR.R b/R/PipeOpImputeOOR.R index b224cab32..d0cdd9d12 100644 --- a/R/PipeOpImputeOOR.R +++ b/R/PipeOpImputeOOR.R @@ -80,7 +80,7 @@ PipeOpImputeOOR = R6Class("PipeOpImputeOOR", inherit = PipeOpImpute, public = list( initialize = function(id = "imputeoor", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( min = p_lgl(tags = c("train", "predict")), offset = p_dbl(lower = 0, tags = c("train", "predict")), multiplier = p_dbl(lower = 0, tags = c("train", "predict"))) diff --git a/R/PipeOpKernelPCA.R b/R/PipeOpKernelPCA.R index da878e480..5c24b21d4 100644 --- a/R/PipeOpKernelPCA.R +++ b/R/PipeOpKernelPCA.R @@ -67,14 +67,14 @@ PipeOpKernelPCA = R6Class("PipeOpKernelPCA", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "kernelpca", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( kernel = p_fct(default = "rbfdot", levels = c("rbfdot", "polydot", "vanilladot", "tanhdot", "laplacedot", "besseldot", "anovadot", "splinedot"), tags = c("train", "kpca")), kpar = p_uty(tags = c("train", "kpca")), features = p_int(default = 0, lower = 0, tags = c("train", "kpca")), th = p_dbl(default = 1e-04, lower = 0, tags = c("train", "kpca")), na.action = p_uty(default = stats::na.omit, tags = c("train", "kpca")) - )) + ) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "kernlab", feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R index 2e6e829f7..3c2d0b4b1 100644 --- a/R/PipeOpLearnerCV.R +++ b/R/PipeOpLearnerCV.R @@ -124,11 +124,11 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV", type = private$.learner$task_type task_type = mlr_reflections$task_types[type, mult = "first"]$task - private$.crossval_param_set = ParamSet$new(params = list( + private$.crossval_param_set = ps( method = p_fct(levels = c("cv", "insample"), tags = c("train", "required")), folds = p_int(lower = 2L, upper = Inf, tags = c("train", "required")), keep_response = p_lgl(tags = c("train", "required")) - )) + ) private$.crossval_param_set$values = list(method = "cv", folds = 3, keep_response = FALSE) if (paradox_info$is_old) { private$.crossval_param_set$set_id = "resampling" diff --git a/R/PipeOpMissingIndicators.R b/R/PipeOpMissingIndicators.R index bbc584a26..7e5d819dd 100644 --- a/R/PipeOpMissingIndicators.R +++ b/R/PipeOpMissingIndicators.R @@ -79,10 +79,10 @@ PipeOpMissInd = R6Class("PipeOpMissInd", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "missind", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( which = p_fct(levels = c("missing_train", "all"), tags = c("train", "required")), type = p_fct(levels = c("factor", "integer", "logical", "numeric"), tags = c("train", "predict", "required")) - )) + ) ps$values = list(which = "missing_train", type = "factor") super$initialize(id, ps, param_vals = param_vals, tags = "missings") if ("affect_columns" %nin% names(param_vals)) { diff --git a/R/PipeOpModelMatrix.R b/R/PipeOpModelMatrix.R index ebfa0e222..a9376c04e 100644 --- a/R/PipeOpModelMatrix.R +++ b/R/PipeOpModelMatrix.R @@ -59,9 +59,9 @@ PipeOpModelMatrix = R6Class("PipeOpModelMatrix", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "modelmatrix", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( formula = p_uty(tags = c("train", "predict"), custom_check = check_formula) - )) + ) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "stats") } ), diff --git a/R/PipeOpMultiplicity.R b/R/PipeOpMultiplicity.R index 532583aa5..649943dda 100644 --- a/R/PipeOpMultiplicity.R +++ b/R/PipeOpMultiplicity.R @@ -260,9 +260,9 @@ PipeOpReplicate = R6Class("PipeOpReplicate", inherit = PipeOp, public = list( initialize = function(id = "replicate", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( reps = p_int(lower = 1, tags = c("train", "predict", "required")) - )) + ) ps$values = list(reps = 1) super$initialize(id, param_set = ps, param_vals = param_vals, input = data.table(name = "input", train = "*", predict = "*"), diff --git a/R/PipeOpMutate.R b/R/PipeOpMutate.R index 230f46e5f..26cbe145d 100644 --- a/R/PipeOpMutate.R +++ b/R/PipeOpMutate.R @@ -73,10 +73,10 @@ PipeOpMutate = R6Class("PipeOpMutate", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "mutate", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( mutation = p_uty(custom_check = check_mutation_formulae, tags = c("train", "predict", "required")), delete_originals = p_lgl(tags = c("train", "predict", "required")) - )) + ) ps$values = list(mutation = list(), delete_originals = FALSE) super$initialize(id, ps, param_vals = param_vals) } diff --git a/R/PipeOpNMF.R b/R/PipeOpNMF.R index c8a757311..e708af7ea 100644 --- a/R/PipeOpNMF.R +++ b/R/PipeOpNMF.R @@ -107,7 +107,7 @@ PipeOpNMF = R6Class("PipeOpNMF", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "nmf", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( rank = p_int(lower = 1L, upper = Inf, tags = c("train", "nmf")), method = p_fct(tags = c("train", "nmf"), levels = c("brunet", "lee", "ls-nmf", "nsNMF", "offset", "pe-nmf", "snmf/r", "snmf/l")), @@ -126,7 +126,7 @@ PipeOpNMF = R6Class("PipeOpNMF", verbose = p_uty(default = FALSE, tags = c("train", "nmf.options")), pbackend = p_uty(tags = c("train", "nmf")), # .pbackend callback = p_uty(tags = c("train", "nmf")) # .callback - )) + ) ps$add_dep("keep.all", on = "nrun", cond = CondLarger$new(1)) ps$add_dep("callback", on = "keep.all", cond = CondEqual$new(TRUE)) ps$values = list(rank = 2L, method = "brunet", parallel = FALSE, parallel.required = FALSE) diff --git a/R/PipeOpPCA.R b/R/PipeOpPCA.R index 1f9b557d3..606fc7b97 100644 --- a/R/PipeOpPCA.R +++ b/R/PipeOpPCA.R @@ -68,11 +68,11 @@ PipeOpPCA = R6Class("PipeOpPCA", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "pca", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( center = p_lgl(default = TRUE, tags = c("train", "pca")), scale. = p_lgl(default = FALSE, tags = c("train", "pca")), rank. = p_int(default = NULL, lower = 1, upper = Inf, special_vals = list(NULL), tags = c("train", "pca")) - )) + ) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } ), diff --git a/R/PipeOpProxy.R b/R/PipeOpProxy.R index 04e40ce28..5d7b91236 100644 --- a/R/PipeOpProxy.R +++ b/R/PipeOpProxy.R @@ -85,7 +85,7 @@ PipeOpProxy = R6Class("PipeOpProxy", assert_int(outnum, lower = 1L) # input can be a vararg input channel inname = if (innum) rep_suffix("input", innum) else "..." - ps = ParamSet$new(params = list( + ps = ps( content = p_uty(tags = c("train", "predidct", "required"), custom_check = function(x) { # content must be an object that can be coerced to a Graph and the output number must match tryCatch({ @@ -103,7 +103,7 @@ PipeOpProxy = R6Class("PipeOpProxy", }, error = function(error_condition) "`content` must be an object that can be converted to a Graph") }) - )) + ) ps$values = list(content = PipeOpFeatureUnion$new(innum = innum)) super$initialize(id, param_set = ps, param_vals = param_vals, input = data.table(name = inname, train = "*", predict = "*"), diff --git a/R/PipeOpQuantileBin.R b/R/PipeOpQuantileBin.R index ea9f67307..712657908 100644 --- a/R/PipeOpQuantileBin.R +++ b/R/PipeOpQuantileBin.R @@ -56,9 +56,9 @@ PipeOpQuantileBin = R6Class("PipeOpQuantileBin", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "quantilebin", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( numsplits = p_int(lower = 2, special_vals = list(NULL), tags = "train") - )) + ) ps$values = list(numsplits = 2L) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "stats", feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpRandomProjection.R b/R/PipeOpRandomProjection.R index 5d3bf7acb..4cdd2bcbb 100644 --- a/R/PipeOpRandomProjection.R +++ b/R/PipeOpRandomProjection.R @@ -70,9 +70,9 @@ PipeOpRandomProjection = R6Class("PipeOpRandomProjection", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "randomprojection", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( rank = p_int(lower = 0, tags = "train") - )) + ) ps$values = list(rank = 1) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpRandomResponse.R b/R/PipeOpRandomResponse.R index 60630aa9f..d67054841 100644 --- a/R/PipeOpRandomResponse.R +++ b/R/PipeOpRandomResponse.R @@ -83,7 +83,7 @@ PipeOpRandomResponse = R6Class("PipeOpRandomResponse", inherit = PipeOp, public = list( initialize = function(id = "randomresponse", param_vals = list(), packages = character(0L)) { - ps = ParamSet$new(params = list( + ps = ps( rdistfun = p_uty(tags = c("predict", "required"), custom_check = function(x) { check_function(x, args = c("n", "mean", "sd")) }) diff --git a/R/PipeOpRemoveConstants.R b/R/PipeOpRemoveConstants.R index 7b09f442d..fb22ea249 100644 --- a/R/PipeOpRemoveConstants.R +++ b/R/PipeOpRemoveConstants.R @@ -63,12 +63,12 @@ PipeOpRemoveConstants = R6Class("PipeOpRemoveConstants", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "removeconstants", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( ratio = p_dbl(lower = 0, upper = 1, tags = c("train", "required", "constant_check")), rel_tol = p_dbl(lower = 0, tags = c("required", "constant_check", "train")), abs_tol = p_dbl(lower = 0, tags = c("required", "constant_check", "train")), na_ignore = p_lgl(tags = c("train", "required", "constant_check")) - )) + ) ps$values = list(ratio = 0, rel_tol = 1e-8, abs_tol = 1e-8, na_ignore = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, tags = "robustify") } diff --git a/R/PipeOpRenameColumns.R b/R/PipeOpRenameColumns.R index 359557f5a..bfdafbd4d 100644 --- a/R/PipeOpRenameColumns.R +++ b/R/PipeOpRenameColumns.R @@ -60,13 +60,13 @@ PipeOpRenameColumns = R6Class("PipeOpRenameColumns", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "renamecolumns", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( renaming = p_uty(tags = c("train", "predict", "required"), custom_check = function(x) { check_character(x, any.missing = FALSE, names = "strict") %check&&% check_names(x, type = "strict") }), ignore_missing = p_lgl(tags = c("train", "predict", "required")) - )) + ) ps$values = list(renaming = character(0), ignore_missing = FALSE) super$initialize(id, ps, param_vals = param_vals, can_subset_cols = FALSE) } diff --git a/R/PipeOpScale.R b/R/PipeOpScale.R index d97ef349a..8a5636c65 100644 --- a/R/PipeOpScale.R +++ b/R/PipeOpScale.R @@ -74,11 +74,11 @@ PipeOpScale = R6Class("PipeOpScale", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "scale", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( center = p_lgl(default = TRUE, tags = c("train", "scale")), scale = p_lgl(default = TRUE, tags = c("train", "scale")), robust = p_lgl(tags = c("train", "required")) - )) + ) ps$values = list(robust = FALSE) super$initialize(id = id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpScaleMaxAbs.R b/R/PipeOpScaleMaxAbs.R index cf5691491..a4abe2e32 100644 --- a/R/PipeOpScaleMaxAbs.R +++ b/R/PipeOpScaleMaxAbs.R @@ -54,9 +54,9 @@ PipeOpScaleMaxAbs = R6Class("PipeOpScaleMaxAbs", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "scalemaxabs", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( maxabs = p_dbl(lower = 0, tags = c("required", "train", "predict")) - )) + ) ps$values = list(maxabs = 1) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpScaleRange.R b/R/PipeOpScaleRange.R index 8c9d583e2..e5f547c24 100644 --- a/R/PipeOpScaleRange.R +++ b/R/PipeOpScaleRange.R @@ -59,10 +59,10 @@ PipeOpScaleRange = R6Class("PipeOpScaleRange", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "scalerange", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( lower = p_dbl(tags = c("required", "train")), upper = p_dbl(tags = c("required", "train")) - )) + ) ps$values = list(lower = 0, upper = 1) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpSelect.R b/R/PipeOpSelect.R index ec4e1b4b5..92721704b 100644 --- a/R/PipeOpSelect.R +++ b/R/PipeOpSelect.R @@ -69,9 +69,9 @@ PipeOpSelect = R6Class("PipeOpSelect", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "select", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( selector = p_uty(custom_check = check_function, tags = c("train", "required")) - )) + ) ps$values = list(selector = selector_all()) super$initialize(id, ps, param_vals = param_vals, tags = "feature selection") } diff --git a/R/PipeOpSmote.R b/R/PipeOpSmote.R index 9e7e0e8e7..9e512bbd0 100644 --- a/R/PipeOpSmote.R +++ b/R/PipeOpSmote.R @@ -71,12 +71,12 @@ PipeOpSmote = R6Class("PipeOpSmote", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "smote", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( K = p_int(lower = 1, default = 5, tags = c("train", "smote")), # dup_size = 0 leads to behaviour different from 1, 2, 3, ..., because it means "autodetect", # so it is a 'special_vals'. dup_size = p_int(lower = 1, default = 0, special_vals = list(0), tags = c("train", "smote")) - )) + ) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "smotefamily", can_subset_cols = FALSE, tags = "imbalanced data") } diff --git a/R/PipeOpSpatialSign.R b/R/PipeOpSpatialSign.R index 464b74e68..0ac0559d4 100644 --- a/R/PipeOpSpatialSign.R +++ b/R/PipeOpSpatialSign.R @@ -55,10 +55,10 @@ PipeOpSpatialSign = R6Class("PipeOpSpatialSign", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "spatialsign", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( length = p_dbl(tags = c("train", "predict"), lower = 0), norm = p_dbl(tags = c("train", "predict"), lower = 0) - )) + ) ps$values = list(norm = 2, length = 1) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpSubsample.R b/R/PipeOpSubsample.R index 881391545..3d657c2bc 100644 --- a/R/PipeOpSubsample.R +++ b/R/PipeOpSubsample.R @@ -64,11 +64,11 @@ PipeOpSubsample = R6Class("PipeOpSubsample", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "subsample", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( frac = p_dbl(lower = 0, upper = Inf, tags = "train"), stratify = p_lgl(tags = "train"), replace = p_lgl(tags = "train") - )) + ) ps$values = list(frac = 1 - exp(-1), stratify = FALSE, replace = FALSE) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE) } diff --git a/R/PipeOpTextVectorizer.R b/R/PipeOpTextVectorizer.R index eb3d4d4d1..2bc990776 100644 --- a/R/PipeOpTextVectorizer.R +++ b/R/PipeOpTextVectorizer.R @@ -165,7 +165,7 @@ PipeOpTextVectorizer = R6Class("PipeOpTextVectorizer", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "textvectorizer", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( stopwords_language = p_fct(tags = c("train", "predict"), levels = c("da", "de", "en", "es", "fi", "fr", "hu", "ir", "it", "nl", "no", "pt", "ro", "ru", "sv" , "smart", "none")), diff --git a/R/PipeOpTrafo.R b/R/PipeOpTrafo.R index c25ff95e0..0d27f2d86 100644 --- a/R/PipeOpTrafo.R +++ b/R/PipeOpTrafo.R @@ -348,10 +348,10 @@ PipeOpTargetMutate = R6Class("PipeOpTargetMutate", public = list( initialize = function(id = "targetmutate", param_vals = list(), new_task_type = NULL) { private$.new_task_type = assert_choice(new_task_type, mlr_reflections$task_types$type, null.ok = TRUE) - ps = ParamSet$new(params = list( + ps = ps( trafo = p_uty(tags = c("train", "predict"), custom_check = function(x) check_function(x, nargs = 1L)), inverter = p_uty(tags = "predict", custom_check = function(x) check_function(x, nargs = 1L)) - )) + ) # We could add a condition here for new_task_type on trafo and inverter when mlr-org/paradox#278 has an answer. # HOWEVER conditions are broken in paradox, it is a terrible idea to use them in PipeOps, # see https://github.com/mlr-org/paradox/issues/216 and related comment in PipeOpLearnerCV @@ -457,10 +457,10 @@ PipeOpTargetTrafoScaleRange = R6Class("PipeOpTargetTrafoScaleRange", inherit = PipeOpTargetTrafo, public = list( initialize = function(id = "targettrafoscalerange", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( lower = p_dbl(tags = c("required", "train")), upper = p_dbl(tags = c("required", "train")) - )) + ) ps$values = list(lower = 0, upper = 1) super$initialize(id = id, param_set = ps, param_vals = param_vals, task_type_in = "TaskRegr") } @@ -565,7 +565,7 @@ PipeOpUpdateTarget = R6Class("PipeOpUpdateTarget", inherit = PipeOp, public = list( initialize = function(id = "update_target", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( trafo = p_uty(tags = c("train", "predict"), custom_check = function(x) check_function(x, nargs = 1L)), new_target_name = p_uty(tags = c("train", "predict"), custom_check = function(x) check_character(x, any.missing = FALSE, len = 1L)), new_task_type = p_uty(tags = c("train", "predict"), custom_check = function(x) check_choice(x, choices = mlr_reflections$task_types$type)), diff --git a/R/PipeOpTuneThreshold.R b/R/PipeOpTuneThreshold.R index 47a182e16..e020db4ad 100644 --- a/R/PipeOpTuneThreshold.R +++ b/R/PipeOpTuneThreshold.R @@ -75,12 +75,12 @@ PipeOpTuneThreshold = R6Class("PipeOpTuneThreshold", public = list( initialize = function(id = "tunethreshold", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( measure = p_uty(custom_check = check_class_or_character("Measure", mlr_measures), tags = "train"), optimizer = p_uty(custom_check = check_optimizer, tags = "train"), log_level = p_uty(tags = "train", function(x) check_string(x) %check||% check_integerish(x)) - )) + ) ps$values = list(measure = "classif.ce", optimizer = "gensa", log_level = "warn") super$initialize(id, param_set = ps, param_vals = param_vals, packages = "bbotk", input = data.table(name = "input", train = "Task", predict = "Task"), diff --git a/R/PipeOpVtreat.R b/R/PipeOpVtreat.R index abc8cdff5..473723ebd 100644 --- a/R/PipeOpVtreat.R +++ b/R/PipeOpVtreat.R @@ -128,7 +128,7 @@ PipeOpVtreat = R6Class("PipeOpVtreat", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "vtreat", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( recommended = p_lgl(tags = c("train", "predict")), cols_to_copy = p_uty(custom_check = checkmate::check_function, tags = c("train", "predict")), # tags stand for: regression vtreat::regression_parameters() / classification vtreat::classification_parameters() / multinomial vtreat::multinomial_parameters() @@ -159,7 +159,7 @@ PipeOpVtreat = R6Class("PipeOpVtreat", # NOTE: imputation_map is also in multinomial_parameters(); this is redundant so only include it here imputation_map = p_uty(default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "predict")) # NOTE: parallelCluster missing intentionally and will be set to NULL - )) + ) ps$add_dep("collarProb", on = "doCollar", cond = CondEqual$new(TRUE)) ps$values = list(recommended = TRUE, cols_to_copy = selector_none()) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "vtreat", tags = c("encode", "missings")) diff --git a/R/PipeOpYeoJohnson.R b/R/PipeOpYeoJohnson.R index 4b1727ad5..99d309f7c 100644 --- a/R/PipeOpYeoJohnson.R +++ b/R/PipeOpYeoJohnson.R @@ -67,12 +67,12 @@ PipeOpYeoJohnson = R6Class("PipeOpYeoJohnson", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "yeojohnson", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( eps = p_dbl(default = 0.001, lower = 0, tags = c("train", "yj")), standardize = p_lgl(default = TRUE, tags = c("train", "yj")), lower = p_dbl(tags = c("train", "yj")), upper = p_dbl(tags = c("train", "yj")) - )) + ) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "bestNormalize", feature_types = c("numeric", "integer")) } From 13fe0d316485d394c455119047041fdc75e77da7 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 13 Jan 2024 23:43:26 +0100 Subject: [PATCH 13/46] some manual fixes --- R/PipeOpClassBalancing.R | 6 ++---- R/PipeOpImputeOOR.R | 2 +- R/PipeOpRandomResponse.R | 5 ++--- R/PipeOpTextVectorizer.R | 2 +- R/PipeOpTrafo.R | 1 - tests/testthat/helper_test_pipeops.R | 9 ++++++--- 6 files changed, 12 insertions(+), 13 deletions(-) diff --git a/R/PipeOpClassBalancing.R b/R/PipeOpClassBalancing.R index bbb63faeb..40be0b558 100644 --- a/R/PipeOpClassBalancing.R +++ b/R/PipeOpClassBalancing.R @@ -106,10 +106,8 @@ PipeOpClassBalancing = R6Class("PipeOpClassBalancing", initialize = function(id = "classbalancing", param_vals = list()) { ps = ps( ratio = p_dbl(lower = 0, upper = Inf, tags = "train"), - ParamFct$new("reference", - levels = c("all", "major", "minor", "nonmajor", "nonminor", "one"), tags = "train"), - ParamFct$new("adjust", - levels = c("all", "major", "minor", "nonmajor", "nonminor", "upsample", "downsample"), tags = "train"), + reference = p_fct(c("all", "major", "minor", "nonmajor", "nonminor", "one"), tags = "train"), + adjust = p_fct(c("all", "major", "minor", "nonmajor", "nonminor", "upsample", "downsample"), tags = "train"), shuffle = p_lgl(tags = "train") ) ps$values = list(ratio = 1, reference = "all", adjust = "all", shuffle = TRUE) diff --git a/R/PipeOpImputeOOR.R b/R/PipeOpImputeOOR.R index d0cdd9d12..456777ce9 100644 --- a/R/PipeOpImputeOOR.R +++ b/R/PipeOpImputeOOR.R @@ -83,7 +83,7 @@ PipeOpImputeOOR = R6Class("PipeOpImputeOOR", ps = ps( min = p_lgl(tags = c("train", "predict")), offset = p_dbl(lower = 0, tags = c("train", "predict")), - multiplier = p_dbl(lower = 0, tags = c("train", "predict"))) + multiplier = p_dbl(lower = 0, tags = c("train", "predict")) ) ps$values = list(min = TRUE, offset = 1, multiplier = 1) # this is one of the few imputers that handles 'character' features! diff --git a/R/PipeOpRandomResponse.R b/R/PipeOpRandomResponse.R index d67054841..b883c8858 100644 --- a/R/PipeOpRandomResponse.R +++ b/R/PipeOpRandomResponse.R @@ -84,10 +84,9 @@ PipeOpRandomResponse = R6Class("PipeOpRandomResponse", public = list( initialize = function(id = "randomresponse", param_vals = list(), packages = character(0L)) { ps = ps( - rdistfun = p_uty(tags = c("predict", "required"), custom_check = function(x) { + rdistfun = p_uty(tags = c("predict", "required"), custom_check = crate(function(x) { check_function(x, args = c("n", "mean", "sd")) - }) - ) + })) ) ps$values = list(rdistfun = stats::rnorm) super$initialize(id = id, param_set = ps, param_vals = param_vals, packages = packages, diff --git a/R/PipeOpTextVectorizer.R b/R/PipeOpTextVectorizer.R index 2bc990776..8c4437fc6 100644 --- a/R/PipeOpTextVectorizer.R +++ b/R/PipeOpTextVectorizer.R @@ -209,7 +209,7 @@ PipeOpTextVectorizer = R6Class("PipeOpTextVectorizer", return_type = p_fct(levels = c("bow", "integer_sequence", "factor_sequence"), tags = c("train", "predict")), sequence_length = p_int(default = 0, lower = 0, upper = Inf, tags = c("train", "predict", "integer_sequence")) - ))$ + )$ add_dep("base_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ add_dep("smoothing_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ add_dep("k_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ diff --git a/R/PipeOpTrafo.R b/R/PipeOpTrafo.R index 0d27f2d86..097742a04 100644 --- a/R/PipeOpTrafo.R +++ b/R/PipeOpTrafo.R @@ -570,7 +570,6 @@ PipeOpUpdateTarget = R6Class("PipeOpUpdateTarget", new_target_name = p_uty(tags = c("train", "predict"), custom_check = function(x) check_character(x, any.missing = FALSE, len = 1L)), new_task_type = p_uty(tags = c("train", "predict"), custom_check = function(x) check_choice(x, choices = mlr_reflections$task_types$type)), drop_original_target = p_lgl(tags = c("train", "predict")) - ) ) ps$values = list(trafo = identity, drop_original_target = TRUE) super$initialize(id = id, param_set = ps, param_vals = param_vals, diff --git a/tests/testthat/helper_test_pipeops.R b/tests/testthat/helper_test_pipeops.R index 921d463fb..bc211374f 100644 --- a/tests/testthat/helper_test_pipeops.R +++ b/tests/testthat/helper_test_pipeops.R @@ -15,7 +15,8 @@ PipeOpDebugBasic = R6Class("PipeOpDebugBasic", .predict = function(inputs) { catf("Predicting %s", self$id) self$state = c(self$state, inputs) - } + }, + .additional_phash_input = function() NULL ) ) @@ -60,7 +61,8 @@ PipeOpDebugMulti = R6Class("PipeOpDebugMulti", self$id, deparse_list_safe(inputs), deparse_list_safe(self$state)) iin = inputs[[1]] as.list(iin + seq_len(self$nout)) - } + }, + .additional_phash_input = function() c(self$nin, self$nout) ) ) @@ -81,6 +83,7 @@ VarargPipeop = R6Class("VarargPipeop", .predict = function(inputs) { self$state = inputs list(inputs) - } + }, + .additional_phash_input = function() self$input$name ) ) From 5717a020b4b0321257571937cbeedec158a71992 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 00:23:53 +0100 Subject: [PATCH 14/46] more adjustments --- R/LearnerAvg.R | 9 ++++----- R/PipeOp.R | 4 ++-- R/PipeOpBranch.R | 6 +++--- R/PipeOpEnsemble.R | 4 ++-- R/PipeOpFilter.R | 2 +- R/PipeOpImpute.R | 18 +++++++++++------- R/PipeOpImputeLearner.R | 2 +- R/PipeOpLearner.R | 2 +- R/PipeOpLearnerCV.R | 2 +- R/PipeOpTaskPreproc.R | 16 ++++++++++------ R/PipeOpThreshold.R | 2 +- R/PipeOpTrafo.R | 6 +++--- R/PipeOpTuneThreshold.R | 3 ++- R/zzz.R | 2 +- tests/testthat/helper_test_pipeops.R | 6 +++--- tests/testthat/test_po.R | 4 ++-- 16 files changed, 48 insertions(+), 40 deletions(-) diff --git a/R/LearnerAvg.R b/R/LearnerAvg.R index 727da1f86..6e9d53c45 100644 --- a/R/LearnerAvg.R +++ b/R/LearnerAvg.R @@ -185,10 +185,9 @@ optimize_weights_learneravg = function(self, task, n_weights, data) { } pars = self$param_set$get_values(tags = "train") - ps = ParamSet$new(params = imap(data, function(x, n) { - if (is.numeric(n)) n = paste0("w.", n) - ParamDbl$new(id = n, lower = 0, upper = 1) - })) + pl = rep(list(p_dbl(0, 1)), length(data)) + names(pl) = names(data) %??% paste0("w.", seq_along(data)) + ps = do.call(ps, pl) optimizer = pars$optimizer if (inherits(optimizer, "character")) { optimizer = bbotk::opt(optimizer) @@ -198,7 +197,7 @@ optimize_weights_learneravg = function(self, task, n_weights, data) { } measure = pars$measure if (is.character(measure)) measure = msr(measure) - codomain = ParamSet$new(params = list(ParamDbl$new(id = measure$id, tags = ifelse(measure$minimize, "minimize", "maximize")))) + codomain = do.call(paradox::ps, structure(list(p_dbl(tags = ifelse(measure$minimize, "minimize", "maximize"))), names = measure$id)) objfun = bbotk::ObjectiveRFun$new( fun = function(xs) learneravg_objfun(xs, task = task, measure = measure, avg_weight_fun = self$weighted_average_prediction, data = data), domain = ps, codomain = codomain diff --git a/R/PipeOp.R b/R/PipeOp.R index 37b84e600..9c8821f57 100644 --- a/R/PipeOp.R +++ b/R/PipeOp.R @@ -38,7 +38,7 @@ #' #' @section Construction: #' ``` -#' PipeOp$new(id, param_set = ParamSet$new(), param_vals = list(), input, output, packages = character(0), tags = character(0)) +#' PipeOp$new(id, param_set = ps(), param_vals = list(), input, output, packages = character(0), tags = character(0)) #' ``` #' #' * `id` :: `character(1)`\cr @@ -236,7 +236,7 @@ PipeOp = R6Class("PipeOp", .result = NULL, tags = NULL, - initialize = function(id, param_set = ParamSet$new(), param_vals = list(), input, output, packages = character(0), tags = "abstract") { + initialize = function(id, param_set = ps(), param_vals = list(), input, output, packages = character(0), tags = "abstract") { if (inherits(param_set, "ParamSet")) { private$.param_set = assert_param_set(param_set) private$.param_set_source = NULL diff --git a/R/PipeOpBranch.R b/R/PipeOpBranch.R index 1952125f0..719505278 100644 --- a/R/PipeOpBranch.R +++ b/R/PipeOpBranch.R @@ -90,14 +90,14 @@ PipeOpBranch = R6Class("PipeOpBranch", ) if (is.numeric(options)) { options = round(options) - param = ParamInt$new("selection", lower = 1L, upper = options, tags = c("train", "predict", "required")) + param = p_int(lower = 1L, upper = options, tags = c("train", "predict", "required")) options = rep_suffix("output", options) initval = 1 } else { - param = ParamFct$new("selection", levels = options, tags = c("train", "predict", "required")) + param = p_fct(options, tags = c("train", "predict", "required")) initval = options[1] } - ps = ParamSet$new(params = list(param)) + ps = ps(selection = param) ps$values$selection = initval super$initialize(id, ps, param_vals, input = data.table(name = "input", train = "*", predict = "*"), diff --git a/R/PipeOpEnsemble.R b/R/PipeOpEnsemble.R index 255237a41..4a4e1f52e 100644 --- a/R/PipeOpEnsemble.R +++ b/R/PipeOpEnsemble.R @@ -10,7 +10,7 @@ #' @section Construction: #' Note: This object is typically constructed via a derived class, e.g. [`PipeOpClassifAvg`] or [`PipeOpRegrAvg`]. #' ``` -#' PipeOpEnsemble$new(innum = 0, collect_multiplicity = FALSE, id, param_set = ParamSet$new(), param_vals = list(), packages = character(0), prediction_type = "Prediction") +#' PipeOpEnsemble$new(innum = 0, collect_multiplicity = FALSE, id, param_set = ps(), param_vals = list(), packages = character(0), prediction_type = "Prediction") #' ``` #' #' * `innum` :: `numeric(1)`\cr @@ -82,7 +82,7 @@ PipeOpEnsemble = R6Class("PipeOpEnsemble", inherit = PipeOp, public = list( - initialize = function(innum = 0, collect_multiplicity = FALSE, id, param_set = ParamSet$new(), param_vals = list(), packages = character(0), prediction_type = "Prediction", tags = NULL) { + initialize = function(innum = 0, collect_multiplicity = FALSE, id, param_set = ps(), param_vals = list(), packages = character(0), prediction_type = "Prediction", tags = NULL) { assert_integerish(innum, lower = 0) param_set$add(ParamUty$new("weights", custom_check = check_weights(innum), tags = "predict")) param_set$values$weights = 1 diff --git a/R/PipeOpFilter.R b/R/PipeOpFilter.R index 1049bab3a..d8657dce4 100644 --- a/R/PipeOpFilter.R +++ b/R/PipeOpFilter.R @@ -195,4 +195,4 @@ PipeOpFilter = R6Class("PipeOpFilter", ) ) -mlr_pipeops$add("filter", PipeOpFilter, list(R6Class("Filter", public = list(id = "dummyfilter", param_set = ParamSet$new()))$new())) +mlr_pipeops$add("filter", PipeOpFilter, list(R6Class("Filter", public = list(id = "dummyfilter", param_set = ps()))$new())) diff --git a/R/PipeOpImpute.R b/R/PipeOpImpute.R index 3e8f52906..ea21018f2 100644 --- a/R/PipeOpImpute.R +++ b/R/PipeOpImpute.R @@ -8,7 +8,7 @@ #' #' @section Construction: #' ``` -#' PipeOpImpute$$new(id, param_set = ParamSet$new(), param_vals = list(), whole_task_dependent = FALSE, packages = character(0), task_type = "Task") +#' PipeOpImpute$$new(id, param_set = ps(), param_vals = list(), whole_task_dependent = FALSE, packages = character(0), task_type = "Task") #' ``` #' #' * `id` :: `character(1)`\cr @@ -110,18 +110,22 @@ PipeOpImpute = R6Class("PipeOpImpute", inherit = PipeOp, public = list( - initialize = function(id, param_set = ParamSet$new(), param_vals = list(), whole_task_dependent = FALSE, packages = character(0), task_type = "Task", feature_types = mlr_reflections$task_feature_types) { + initialize = function(id, param_set = ps(), param_vals = list(), whole_task_dependent = FALSE, packages = character(0), task_type = "Task", feature_types = mlr_reflections$task_feature_types) { # add one or two parameters: affect_columns (always) and context_columns (if whole_task_dependent is TRUE) - addparams = list(ParamUty$new("affect_columns", custom_check = check_function_or_null, tags = "train")) + addparams = list(affect_columns = p_uty(custom_check = check_function_or_null, tags = "train")) if (whole_task_dependent) { - addparams = c(addparams, list(ParamUty$new("context_columns", custom_check = check_function_or_null, tags = "train"))) + addparams = c(addparams, list(context_columns = p_uty(custom_check = check_function_or_null, tags = "train"))) } - + affectcols_ps = do.call(ps, addparams) # ParamSetCollection handles adding of new parameters differently if (inherits(param_set, "ParamSet")) { - lapply(addparams, param_set$add) + if (paradox_info$is_old) { + lapply(affectcols_ps$params, param_set$add) + } else { + param_set = c(param_set, affectcols_ps) + } } else { - private$.affectcols_ps = ParamSet$new(addparams) + private$.affectcols_ps = affectcols_ps param_set = c(param_set, alist(private$.affectcols_ps)) } private$.feature_types = assert_subset(feature_types, mlr_reflections$task_feature_types) diff --git a/R/PipeOpImputeLearner.R b/R/PipeOpImputeLearner.R index 8c29b0a14..3ff7662f1 100644 --- a/R/PipeOpImputeLearner.R +++ b/R/PipeOpImputeLearner.R @@ -198,7 +198,7 @@ PipeOpImputeLearner = R6Class("PipeOpImputeLearner", ) ) -mlr_pipeops$add("imputelearner", PipeOpImputeLearner, list(R6Class("Learner", public = list(id = "learner", task_type = "classif", param_set = ParamSet$new()))$new())) +mlr_pipeops$add("imputelearner", PipeOpImputeLearner, list(R6Class("Learner", public = list(id = "learner", task_type = "classif", param_set = ps()))$new())) # See mlr-org/mlr#470 convert_to_task = function(id = "imputing", data, target, task_type, ...) { diff --git a/R/PipeOpLearner.R b/R/PipeOpLearner.R index d8797a976..940dfa555 100644 --- a/R/PipeOpLearner.R +++ b/R/PipeOpLearner.R @@ -155,4 +155,4 @@ PipeOpLearner = R6Class("PipeOpLearner", inherit = PipeOp, ) ) -mlr_pipeops$add("learner", PipeOpLearner, list(R6Class("Learner", public = list(id = "learner", task_type = "classif", param_set = ParamSet$new(), packages = "mlr3pipelines"))$new())) +mlr_pipeops$add("learner", PipeOpLearner, list(R6Class("Learner", public = list(id = "learner", task_type = "classif", param_set = ps(), packages = "mlr3pipelines"))$new())) diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R index 3c2d0b4b1..994e1045a 100644 --- a/R/PipeOpLearnerCV.R +++ b/R/PipeOpLearnerCV.R @@ -222,4 +222,4 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV", ) ) -mlr_pipeops$add("learner_cv", PipeOpLearnerCV, list(R6Class("Learner", public = list(id = "learner_cv", task_type = "classif", param_set = ParamSet$new()))$new())) +mlr_pipeops$add("learner_cv", PipeOpLearnerCV, list(R6Class("Learner", public = list(id = "learner_cv", task_type = "classif", param_set = ps()))$new())) diff --git a/R/PipeOpTaskPreproc.R b/R/PipeOpTaskPreproc.R index c87517644..892076c32 100644 --- a/R/PipeOpTaskPreproc.R +++ b/R/PipeOpTaskPreproc.R @@ -36,7 +36,7 @@ #' #' @section Construction: #' ``` -#' PipeOpTaskPreproc$new(id, param_set = ParamSet$new(), param_vals = list(), can_subset_cols = TRUE, +#' PipeOpTaskPreproc$new(id, param_set = ps(), param_vals = list(), can_subset_cols = TRUE, #' packages = character(0), task_type = "Task", tags = NULL, feature_types = mlr_reflections$task_feature_types) #' ``` #' @@ -168,14 +168,18 @@ PipeOpTaskPreproc = R6Class("PipeOpTaskPreproc", inherit = PipeOp, public = list( - initialize = function(id, param_set = ParamSet$new(), param_vals = list(), can_subset_cols = TRUE, + initialize = function(id, param_set = ps(), param_vals = list(), can_subset_cols = TRUE, packages = character(0), task_type = "Task", tags = NULL, feature_types = mlr_reflections$task_feature_types) { if (can_subset_cols) { - acp = ParamUty$new("affect_columns", custom_check = check_function_or_null, default = selector_all(), tags = "train") + affectcols_ps = ps(affect_columns = p_uty(custom_check = check_function_or_null, default = selector_all(), tags = "train")) if (inherits(param_set, "ParamSet")) { - param_set$add(acp) + if (paradox_info$is_old) { + lapply(affectcols_ps$params, param_set$add) + } else { + param_set = c(param_set, affectcols_ps) + } } else { - private$.affectcols_ps = ParamSet$new(params = list(acp)) + private$.affectcols_ps = affectcols_ps param_set = c(param_set, alist(private$.affectcols_ps)) } } @@ -318,7 +322,7 @@ PipeOpTaskPreproc = R6Class("PipeOpTaskPreproc", #' #' @section Construction: #' ``` -#' PipeOpTaskPreprocSimple$new(id, param_set = ParamSet$new(), param_vals = list(), can_subset_cols = TRUE, packages = character(0), task_type = "Task") +#' PipeOpTaskPreprocSimple$new(id, param_set = ps(), param_vals = list(), can_subset_cols = TRUE, packages = character(0), task_type = "Task") #' ``` #' (Construction is identical to [`PipeOpTaskPreproc`].) #' diff --git a/R/PipeOpThreshold.R b/R/PipeOpThreshold.R index 61db915ff..94e69e71a 100644 --- a/R/PipeOpThreshold.R +++ b/R/PipeOpThreshold.R @@ -56,7 +56,7 @@ PipeOpThreshold = R6Class("PipeOpThreshold", inherit = PipeOp, public = list( initialize = function(id = "threshold", param_vals = list()) { - param_set = ParamSet$new() + param_set = ps() param_set$add(ParamUty$new("thresholds", custom_check = check_numeric_valid_threshold, tags = "predict")) param_set$values$thresholds = 0.5 super$initialize(id, param_set = param_set, param_vals = param_vals, packages = character(0), diff --git a/R/PipeOpTrafo.R b/R/PipeOpTrafo.R index 097742a04..101256cd0 100644 --- a/R/PipeOpTrafo.R +++ b/R/PipeOpTrafo.R @@ -15,7 +15,7 @@ #' #' @section Construction: #' ``` -#' PipeOpTargetTrafo$new(id, param_set = ParamSet$new(), param_vals = list() packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) +#' PipeOpTargetTrafo$new(id, param_set = ps(), param_vals = list() packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) #' ``` #' #' * `id` :: `character(1)`\cr @@ -118,7 +118,7 @@ PipeOpTargetTrafo = R6Class("PipeOpTargetTrafo", inherit = PipeOp, public = list( - initialize = function(id, param_set = ParamSet$new(), param_vals = list(), packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) { + initialize = function(id, param_set = ps(), param_vals = list(), packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) { super$initialize(id = id, param_set = param_set, param_vals = param_vals, input = data.table(name = "input", train = task_type_in, predict = task_type_in), output = data.table(name = c("fun", "output"), train = c("NULL", task_type_out), predict = c("function", task_type_out)), @@ -515,7 +515,7 @@ mlr_pipeops$add("targettrafoscalerange", PipeOpTargetTrafoScaleRange) #' #' @section Construction: #' ``` -#' PipeOpUpdateTarget$new(id, param_set = ParamSet$new(), +#' PipeOpUpdateTarget$new(id, param_set = ps(), #' param_vals = list(), packages = character(0)) #' ``` #' diff --git a/R/PipeOpTuneThreshold.R b/R/PipeOpTuneThreshold.R index e020db4ad..440e2b6df 100644 --- a/R/PipeOpTuneThreshold.R +++ b/R/PipeOpTuneThreshold.R @@ -120,7 +120,8 @@ PipeOpTuneThreshold = R6Class("PipeOpTuneThreshold", ps = private$.make_param_set(pred) measure = self$param_set$values$measure if (is.character(measure)) measure = msr(measure) else measure - codomain = ParamSet$new(params = list(ParamDbl$new(id = measure$id, tags = ifelse(measure$minimize, "minimize", "maximize")))) + codomain = do.call(ps, structure(list(p_dbl(tags = ifelse(measure$minimize, "minimize", "maximize"))), names = measure$id)) + objfun = bbotk::ObjectiveRFun$new( fun = function(xs) private$.objfun(xs, pred = pred, measure = measure), domain = ps, codomain = codomain diff --git a/R/zzz.R b/R/zzz.R index f811affc9..218cc0548 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -42,4 +42,4 @@ paradox_info <- list2env(list(is_old = FALSE), parent = emptyenv()) # static code checks should not complain about commonly used data.table columns utils::globalVariables(c("src_id", "dst_id", "name", "op.id", "response", "truth")) -# leanify_package() +leanify_package() diff --git a/tests/testthat/helper_test_pipeops.R b/tests/testthat/helper_test_pipeops.R index bc211374f..0589a0791 100644 --- a/tests/testthat/helper_test_pipeops.R +++ b/tests/testthat/helper_test_pipeops.R @@ -1,7 +1,7 @@ PipeOpDebugBasic = R6Class("PipeOpDebugBasic", inherit = PipeOp, public = list( - initialize = function(id = "debug.basic", param_set = ParamSet$new()) { + initialize = function(id = "debug.basic", param_set = ps()) { super$initialize(id = id, param_set = param_set, input = data.table(name = "input", train = "*", predict = "*"), output = data.table(name = "output", train = "*", predict = "*") @@ -42,10 +42,10 @@ PipeOpDebugMulti = R6Class("PipeOpDebugMulti", if (is.numeric(outputs)) { outputs = paste0("output_", seq_len(outputs)) } - p = ParamInt$new(id = "par", lower = 0, upper = 10, default = 0, tags = c("train", "predict")) + p = ps(par = p_int(lower = 0, upper = 10, default = 0, tags = c("train", "predict"))) self$nin = length(inputs) self$nout = length(outputs) - super$initialize(id, ParamSet$new(list(p)), + super$initialize(id, p), input = data.table(name = inputs, train = "*", predict = "*"), output = data.table(name = outputs, train = "*", predict = "*")) }), diff --git a/tests/testthat/test_po.R b/tests/testthat/test_po.R index fb6fc0559..3438330d3 100644 --- a/tests/testthat/test_po.R +++ b/tests/testthat/test_po.R @@ -55,7 +55,7 @@ test_that("mlr_pipeops access works", { dblrn = R6Class("debuglearn", inherit = LearnerClassif, public = list( initialize = function() { - super$initialize(id = "debuglearn", param_set = paradox::ParamSet$new()$add(paradox::ParamDbl$new("key"))) + super$initialize(id = "debuglearn", param_set = ps(key = p_dbl())) } ) ) @@ -159,7 +159,7 @@ test_that("mlr_pipeops multi-access works", { dblrn = R6Class("debuglearn", inherit = LearnerClassif, public = list( initialize = function() { - super$initialize(id = "debuglearn", param_set = paradox::ParamSet$new()$add(paradox::ParamDbl$new("key"))) + super$initialize(id = "debuglearn", param_set = ps(key = p_dbl())) } ) ) From 7b5afd83c5e4c6bb99e740cbf8b4578da5c946ff Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 00:42:46 +0100 Subject: [PATCH 15/46] dependencies with new syntax --- R/PipeOpNMF.R | 19 ++++++++-------- R/PipeOpTextVectorizer.R | 33 +++++++++++----------------- R/PipeOpTuneThreshold.R | 2 +- R/PipeOpVtreat.R | 3 +-- tests/testthat/helper_test_pipeops.R | 2 +- 5 files changed, 25 insertions(+), 34 deletions(-) diff --git a/R/PipeOpNMF.R b/R/PipeOpNMF.R index e708af7ea..aa2beb689 100644 --- a/R/PipeOpNMF.R +++ b/R/PipeOpNMF.R @@ -125,10 +125,8 @@ PipeOpNMF = R6Class("PipeOpNMF", track = p_lgl(default = FALSE, tags = c("train", "nmf.options")), verbose = p_uty(default = FALSE, tags = c("train", "nmf.options")), pbackend = p_uty(tags = c("train", "nmf")), # .pbackend - callback = p_uty(tags = c("train", "nmf")) # .callback + callback = p_uty(tags = c("train", "nmf"), depends = keep.all == TRUE) # .callback ) - ps$add_dep("keep.all", on = "nrun", cond = CondLarger$new(1)) - ps$add_dep("callback", on = "keep.all", cond = CondEqual$new(TRUE)) ps$values = list(rank = 2L, method = "brunet", parallel = FALSE, parallel.required = FALSE) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer"), packages = c("MASS", "NMF")) } @@ -179,10 +177,11 @@ PipeOpNMF = R6Class("PipeOpNMF", mlr_pipeops$add("nmf", PipeOpNMF) -CondLarger = R6Class("CondLarger", inherit = Condition, - public = list( - initialize = function(rhs) super$initialize("larger", rhs), - test = function(x) !is.na(x) & x > self$rhs, - as_string = function(lhs_chr = "x") sprintf("%s > %s", lhs_chr, as.character(self$rhs)) - ) -) +# this is just a really bad idea +## CondLarger = R6Class("CondLarger", inherit = Condition, +## public = list( +## initialize = function(rhs) super$initialize("larger", rhs), +## test = function(x) !is.na(x) & x > self$rhs, +## as_string = function(lhs_chr = "x") sprintf("%s > %s", lhs_chr, as.character(self$rhs)) +## ) +## ) diff --git a/R/PipeOpTextVectorizer.R b/R/PipeOpTextVectorizer.R index 8c4437fc6..eb1531714 100644 --- a/R/PipeOpTextVectorizer.R +++ b/R/PipeOpTextVectorizer.R @@ -187,7 +187,8 @@ PipeOpTextVectorizer = R6Class("PipeOpTextVectorizer", skip = p_uty(default = 0, tags = c("train", "predict", "ngrams"), custom_check = curry(check_integerish, min.len = 1, lower = 0, any.missing = FALSE)), sparsity = p_dbl(lower = 0, upper = 1, default = NULL, - tags = c("train", "dfm_trim"), special_vals = list(NULL)), + tags = c("train", "dfm_trim"), special_vals = list(NULL), + depends = return_type == "bow"), termfreq_type = p_fct(default = "count", tags = c("train", "dfm_trim"), levels = c("count", "prop", "rank", "quantile")), min_termfreq = p_dbl(lower = 0, default = NULL, @@ -197,29 +198,21 @@ PipeOpTextVectorizer = R6Class("PipeOpTextVectorizer", scheme_df = p_fct(default = "count", tags = c("train", "docfreq"), levels = c("count", "inverse", "inversemax", "inverseprob", "unary")), - smoothing_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq")), - k_df = p_dbl(lower = 0, tags = c("train", "docfreq")), - threshold_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq")), - base_df = p_dbl(lower = 0, default = 10, tags = c("train", "docfreq")), + smoothing_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq"), depends = scheme_df %in% c("inverse", "inversemax", "inverseprob")), + k_df = p_dbl(lower = 0, tags = c("train", "docfreq"), depends = scheme_df %in% c("inverse", "inversemax", "inverseprob")), + threshold_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq"), depends = scheme_df == "count"), + base_df = p_dbl(lower = 0, default = 10, tags = c("train", "docfreq"), + depends = scheme_df %in% c("inverse", "inversemax", "inverseprob")), - scheme_tf = p_fct(default = "count", tags = c("train", "predict", "dfm_weight"), + scheme_tf = p_fct(default = "count", tags = c("train", "predict", "dfm_weight", depends = return_type == "bow"), levels = c("count", "prop", "propmax", "logcount", "boolean", "augmented", "logave")), - k_tf = p_dbl(lower = 0, upper = 1, tags = c("train", "predict", "dfm_weight")), - base_tf = p_dbl(lower = 0, default = 10, tags = c("train", "predict", "dfm_weight")), + k_tf = p_dbl(lower = 0, upper = 1, tags = c("train", "predict", "dfm_weight"), depends = scheme_tf == "augmented"), + base_tf = p_dbl(lower = 0, default = 10, tags = c("train", "predict", "dfm_weight"), depends = scheme_tf %in% c("logcount", "logave")), return_type = p_fct(levels = c("bow", "integer_sequence", "factor_sequence"), tags = c("train", "predict")), - sequence_length = p_int(default = 0, lower = 0, upper = Inf, tags = c("train", "predict", "integer_sequence")) - )$ - add_dep("base_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ - add_dep("smoothing_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ - add_dep("k_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ - add_dep("base_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ - add_dep("threshold_df", "scheme_df", CondEqual$new("count"))$ - add_dep("k_tf", "scheme_tf", CondEqual$new("augmented"))$ - add_dep("base_tf", "scheme_tf", CondAnyOf$new(c("logcount", "logave")))$ - add_dep("scheme_tf", "return_type", CondEqual$new("bow"))$ - add_dep("sparsity", "return_type", CondEqual$new("bow"))$ - add_dep("sequence_length", "return_type", CondAnyOf$new(c("integer_sequence", "factor_sequence"))) + sequence_length = p_int(default = 0, lower = 0, upper = Inf, tags = c("train", "predict", "integer_sequence"), + depends = return_type %in% c("integer_sequence", "factor_sequence")) + ) ps$values = list(stopwords_language = "smart", extra_stopwords = character(0), n = 1, scheme_df = "unary", return_type = "bow") super$initialize(id = id, param_set = ps, param_vals = param_vals, packages = c("quanteda", "stopwords"), feature_types = "character") diff --git a/R/PipeOpTuneThreshold.R b/R/PipeOpTuneThreshold.R index 440e2b6df..e4891eb6f 100644 --- a/R/PipeOpTuneThreshold.R +++ b/R/PipeOpTuneThreshold.R @@ -120,7 +120,7 @@ PipeOpTuneThreshold = R6Class("PipeOpTuneThreshold", ps = private$.make_param_set(pred) measure = self$param_set$values$measure if (is.character(measure)) measure = msr(measure) else measure - codomain = do.call(ps, structure(list(p_dbl(tags = ifelse(measure$minimize, "minimize", "maximize"))), names = measure$id)) + codomain = do.call(paradox::ps, structure(list(p_dbl(tags = ifelse(measure$minimize, "minimize", "maximize"))), names = measure$id)) objfun = bbotk::ObjectiveRFun$new( fun = function(xs) private$.objfun(xs, pred = pred, measure = measure), diff --git a/R/PipeOpVtreat.R b/R/PipeOpVtreat.R index 473723ebd..8a87e9ba5 100644 --- a/R/PipeOpVtreat.R +++ b/R/PipeOpVtreat.R @@ -136,7 +136,7 @@ PipeOpVtreat = R6Class("PipeOpVtreat", smFactor = p_dbl(lower = 0, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), rareCount = p_int(lower = 0L, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), rareSig = p_dbl(lower = 0, upper = 1, special_vals = list(NULL), tags = c("train", "regression", "classification", "multinomial")), # default NULL for regression, classification, 1 for multinomial - collarProb = p_dbl(lower = 0, upper = 1, default = 0, tags = c("train", "regression", "classification", "multinomial")), + collarProb = p_dbl(lower = 0, upper = 1, default = 0, tags = c("train", "regression", "classification", "multinomial"), depends = doCollar == TRUE), doCollar = p_lgl(default = FALSE, tags = c("train", "regression", "classification", "multinomial")), codeRestriction = p_uty(default = NULL, custom_check = function(x) checkmate::check_character(x, any.missing = FALSE, null.ok = TRUE), tags = c("train", "regression", "classification", "multinomial")), @@ -160,7 +160,6 @@ PipeOpVtreat = R6Class("PipeOpVtreat", imputation_map = p_uty(default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "predict")) # NOTE: parallelCluster missing intentionally and will be set to NULL ) - ps$add_dep("collarProb", on = "doCollar", cond = CondEqual$new(TRUE)) ps$values = list(recommended = TRUE, cols_to_copy = selector_none()) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "vtreat", tags = c("encode", "missings")) } diff --git a/tests/testthat/helper_test_pipeops.R b/tests/testthat/helper_test_pipeops.R index 0589a0791..5265177bb 100644 --- a/tests/testthat/helper_test_pipeops.R +++ b/tests/testthat/helper_test_pipeops.R @@ -45,7 +45,7 @@ PipeOpDebugMulti = R6Class("PipeOpDebugMulti", p = ps(par = p_int(lower = 0, upper = 10, default = 0, tags = c("train", "predict"))) self$nin = length(inputs) self$nout = length(outputs) - super$initialize(id, p), + super$initialize(id, param_set = p, input = data.table(name = inputs, train = "*", predict = "*"), output = data.table(name = outputs, train = "*", predict = "*")) }), From 5550db846bf1df4cd1c15b50aed7d440b83e566e Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 00:51:25 +0100 Subject: [PATCH 16/46] quote and fix dependencies --- R/PipeOpNMF.R | 2 +- R/PipeOpTextVectorizer.R | 18 +++++++++--------- R/PipeOpVtreat.R | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/R/PipeOpNMF.R b/R/PipeOpNMF.R index aa2beb689..bb99d02b1 100644 --- a/R/PipeOpNMF.R +++ b/R/PipeOpNMF.R @@ -125,7 +125,7 @@ PipeOpNMF = R6Class("PipeOpNMF", track = p_lgl(default = FALSE, tags = c("train", "nmf.options")), verbose = p_uty(default = FALSE, tags = c("train", "nmf.options")), pbackend = p_uty(tags = c("train", "nmf")), # .pbackend - callback = p_uty(tags = c("train", "nmf"), depends = keep.all == TRUE) # .callback + callback = p_uty(tags = c("train", "nmf"), depends = quote(keep.all == TRUE)) # .callback ) ps$values = list(rank = 2L, method = "brunet", parallel = FALSE, parallel.required = FALSE) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer"), packages = c("MASS", "NMF")) diff --git a/R/PipeOpTextVectorizer.R b/R/PipeOpTextVectorizer.R index eb1531714..8550fdb22 100644 --- a/R/PipeOpTextVectorizer.R +++ b/R/PipeOpTextVectorizer.R @@ -188,7 +188,7 @@ PipeOpTextVectorizer = R6Class("PipeOpTextVectorizer", sparsity = p_dbl(lower = 0, upper = 1, default = NULL, tags = c("train", "dfm_trim"), special_vals = list(NULL), - depends = return_type == "bow"), + depends = quote(return_type == "bow")), termfreq_type = p_fct(default = "count", tags = c("train", "dfm_trim"), levels = c("count", "prop", "rank", "quantile")), min_termfreq = p_dbl(lower = 0, default = NULL, @@ -198,20 +198,20 @@ PipeOpTextVectorizer = R6Class("PipeOpTextVectorizer", scheme_df = p_fct(default = "count", tags = c("train", "docfreq"), levels = c("count", "inverse", "inversemax", "inverseprob", "unary")), - smoothing_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq"), depends = scheme_df %in% c("inverse", "inversemax", "inverseprob")), - k_df = p_dbl(lower = 0, tags = c("train", "docfreq"), depends = scheme_df %in% c("inverse", "inversemax", "inverseprob")), - threshold_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq"), depends = scheme_df == "count"), + smoothing_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq"), depends = quote(scheme_df %in% c("inverse", "inversemax", "inverseprob"))), + k_df = p_dbl(lower = 0, tags = c("train", "docfreq"), depends = quote(scheme_df %in% c("inverse", "inversemax", "inverseprob"))), + threshold_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq"), depends = quote(scheme_df == "count")), base_df = p_dbl(lower = 0, default = 10, tags = c("train", "docfreq"), - depends = scheme_df %in% c("inverse", "inversemax", "inverseprob")), + depends = quote(scheme_df %in% c("inverse", "inversemax", "inverseprob"))), - scheme_tf = p_fct(default = "count", tags = c("train", "predict", "dfm_weight", depends = return_type == "bow"), + scheme_tf = p_fct(default = "count", tags = c("train", "predict", "dfm_weight"), depends = quote(return_type == "bow"), levels = c("count", "prop", "propmax", "logcount", "boolean", "augmented", "logave")), - k_tf = p_dbl(lower = 0, upper = 1, tags = c("train", "predict", "dfm_weight"), depends = scheme_tf == "augmented"), - base_tf = p_dbl(lower = 0, default = 10, tags = c("train", "predict", "dfm_weight"), depends = scheme_tf %in% c("logcount", "logave")), + k_tf = p_dbl(lower = 0, upper = 1, tags = c("train", "predict", "dfm_weight"), depends = quote(scheme_tf == "augmented")), + base_tf = p_dbl(lower = 0, default = 10, tags = c("train", "predict", "dfm_weight"), depends = quote(scheme_tf %in% c("logcount", "logave"))), return_type = p_fct(levels = c("bow", "integer_sequence", "factor_sequence"), tags = c("train", "predict")), sequence_length = p_int(default = 0, lower = 0, upper = Inf, tags = c("train", "predict", "integer_sequence"), - depends = return_type %in% c("integer_sequence", "factor_sequence")) + depends = quote(return_type %in% c("integer_sequence", "factor_sequence"))) ) ps$values = list(stopwords_language = "smart", extra_stopwords = character(0), n = 1, scheme_df = "unary", return_type = "bow") diff --git a/R/PipeOpVtreat.R b/R/PipeOpVtreat.R index 8a87e9ba5..677fbc7dd 100644 --- a/R/PipeOpVtreat.R +++ b/R/PipeOpVtreat.R @@ -136,7 +136,7 @@ PipeOpVtreat = R6Class("PipeOpVtreat", smFactor = p_dbl(lower = 0, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), rareCount = p_int(lower = 0L, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), rareSig = p_dbl(lower = 0, upper = 1, special_vals = list(NULL), tags = c("train", "regression", "classification", "multinomial")), # default NULL for regression, classification, 1 for multinomial - collarProb = p_dbl(lower = 0, upper = 1, default = 0, tags = c("train", "regression", "classification", "multinomial"), depends = doCollar == TRUE), + collarProb = p_dbl(lower = 0, upper = 1, default = 0, tags = c("train", "regression", "classification", "multinomial"), depends = quote(doCollar == TRUE)), doCollar = p_lgl(default = FALSE, tags = c("train", "regression", "classification", "multinomial")), codeRestriction = p_uty(default = NULL, custom_check = function(x) checkmate::check_character(x, any.missing = FALSE, null.ok = TRUE), tags = c("train", "regression", "classification", "multinomial")), From 577fb89a31aad329082d65a81a3f088e9f42ebfe Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 01:29:23 +0100 Subject: [PATCH 17/46] already going quite far --- .github/workflows/r-cmd-check-paradox.yml | 4 +-- R/PipeOpEnsemble.R | 8 ++++-- tests/testthat/helper_functions.R | 32 ++++++++++++++++------- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/.github/workflows/r-cmd-check-paradox.yml b/.github/workflows/r-cmd-check-paradox.yml index 784340558..db4229696 100644 --- a/.github/workflows/r-cmd-check-paradox.yml +++ b/.github/workflows/r-cmd-check-paradox.yml @@ -4,10 +4,10 @@ on: workflow_dispatch: push: branches: - - main + - master pull_request: branches: - - main + - master name: r-cmd-check-paradox diff --git a/R/PipeOpEnsemble.R b/R/PipeOpEnsemble.R index 4a4e1f52e..6bcd45639 100644 --- a/R/PipeOpEnsemble.R +++ b/R/PipeOpEnsemble.R @@ -84,8 +84,12 @@ PipeOpEnsemble = R6Class("PipeOpEnsemble", public = list( initialize = function(innum = 0, collect_multiplicity = FALSE, id, param_set = ps(), param_vals = list(), packages = character(0), prediction_type = "Prediction", tags = NULL) { assert_integerish(innum, lower = 0) - param_set$add(ParamUty$new("weights", custom_check = check_weights(innum), tags = "predict")) - param_set$values$weights = 1 + if (paradox_info$is_old) { + param_set$add(ParamUty$new("weights", custom_check = check_weights(innum), tags = "predict")) + param_set$values$weights = 1 + } else { + param_set = c(param_set, ps(weights = p_uty(check_weights(innum), tags = "predict", init = 1))) + } inname = if (innum) rep_suffix("input", innum) else "..." intype = c("NULL", prediction_type) private$.collect = assert_flag(collect_multiplicity) diff --git a/tests/testthat/helper_functions.R b/tests/testthat/helper_functions.R index cec7417d9..a36fc76d9 100644 --- a/tests/testthat/helper_functions.R +++ b/tests/testthat/helper_functions.R @@ -118,15 +118,29 @@ expect_valid_pipeop_param_set = function(po, check_ps_default_values = TRUE) { ps = po$param_set expect_true(every(ps$tags, function(x) length(intersect(c("train", "predict"), x)) > 0L)) - uties = ps$params[ps$ids("ParamUty")] - if (length(uties)) { - test_value = NO_DEF # custom_checks should fail for NO_DEF - results = map(uties, function(uty) { - uty$custom_check(test_value) - }) - expect_true(all(map_lgl(results, function(result) { - length(result) == 1L && (is.character(result) || result == TRUE) # result == TRUE is necessary because default is function(x) TRUE - })), label = "custom_check returns string on failure") + if (mlr3pipelines:::paradox_info$is_old) { + uties = ps$params[ps$ids("ParamUty")] + if (length(uties)) { + test_value = NO_DEF # custom_checks should fail for NO_DEF + results = map(uties, function(uty) { + uty$custom_check(test_value) + }) + expect_true(all(map_lgl(results, function(result) { + length(result) == 1L && (is.character(result) || result == TRUE) # result == TRUE is necessary because default is function(x) TRUE + })), label = "custom_check returns string on failure") + } + } else { + uties = ps$ids("ParamUty") + if (length(uties)) { + test_value = NO_DEF # custom_checks should fail for NO_DEF + results = map(uties, function(uty) { + psn = ps$subset(uty, allow_dangling_dependencies = TRUE) + psn$check(structure(list(test_value), names = uty)) + }) + expect_true(all(map_lgl(results, function(result) { + length(result) == 1L && (is.character(result) || result == TRUE) # result == TRUE is necessary because default is function(x) TRUE + })), label = "custom_check returns string on failure") + } } if (check_ps_default_values) { From 7e504e718d7a50a7a93a3b1ee7b50cba3addf1bf Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 11:59:13 +0100 Subject: [PATCH 18/46] pacify static checker --- R/PipeOpEnsemble.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/PipeOpEnsemble.R b/R/PipeOpEnsemble.R index 6bcd45639..7be8e1aab 100644 --- a/R/PipeOpEnsemble.R +++ b/R/PipeOpEnsemble.R @@ -86,10 +86,10 @@ PipeOpEnsemble = R6Class("PipeOpEnsemble", assert_integerish(innum, lower = 0) if (paradox_info$is_old) { param_set$add(ParamUty$new("weights", custom_check = check_weights(innum), tags = "predict")) - param_set$values$weights = 1 } else { - param_set = c(param_set, ps(weights = p_uty(check_weights(innum), tags = "predict", init = 1))) + param_set = c(param_set, ps(weights = p_uty(check_weights(innum), tags = "predict"))) } + param_set$values$weights = 1 inname = if (innum) rep_suffix("input", innum) else "..." intype = c("NULL", prediction_type) private$.collect = assert_flag(collect_multiplicity) From 0266d3f8eb60827f57dfd7009a8f40b3ea4a2d67 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 12:00:36 +0100 Subject: [PATCH 19/46] pacify static checker II --- R/PipeOpEnsemble.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/PipeOpEnsemble.R b/R/PipeOpEnsemble.R index 7be8e1aab..df3bcab98 100644 --- a/R/PipeOpEnsemble.R +++ b/R/PipeOpEnsemble.R @@ -85,7 +85,8 @@ PipeOpEnsemble = R6Class("PipeOpEnsemble", initialize = function(innum = 0, collect_multiplicity = FALSE, id, param_set = ps(), param_vals = list(), packages = character(0), prediction_type = "Prediction", tags = NULL) { assert_integerish(innum, lower = 0) if (paradox_info$is_old) { - param_set$add(ParamUty$new("weights", custom_check = check_weights(innum), tags = "predict")) + paux = ps(weights = p_uty(check_weights(innum), tags = "predict")) + param_set$add(paux$params$weights) } else { param_set = c(param_set, ps(weights = p_uty(check_weights(innum), tags = "predict"))) } From 1777b4df9f453e49a0e3a8610bffc9b48aca5d90 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 12:05:05 +0100 Subject: [PATCH 20/46] some more new paradox interface --- R/PipeOpBranch.R | 4 ++-- R/PipeOpThreshold.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/PipeOpBranch.R b/R/PipeOpBranch.R index 719505278..25f48efef 100644 --- a/R/PipeOpBranch.R +++ b/R/PipeOpBranch.R @@ -19,9 +19,9 @@ #' * `options` :: `numeric(1)` | `character`\cr #' If `options` is an integer number, it determines the number of #' output channels / options that are created, named `output1`...`output`. The -#' `$selection` parameter will then be a [`ParamInt`]. +#' `$selection` parameter will then be an integer. #' If `options` is a `character`, it determines the names of channels directly. -#' The `$selection` parameter will then be a [`ParamFct`]. +#' The `$selection` parameter will then be factorial. #' * `id` :: `character(1)`\cr #' Identifier of resulting object, default `"branch"`. #' * `param_vals` :: named `list`\cr diff --git a/R/PipeOpThreshold.R b/R/PipeOpThreshold.R index 94e69e71a..3f9dae220 100644 --- a/R/PipeOpThreshold.R +++ b/R/PipeOpThreshold.R @@ -56,8 +56,8 @@ PipeOpThreshold = R6Class("PipeOpThreshold", inherit = PipeOp, public = list( initialize = function(id = "threshold", param_vals = list()) { - param_set = ps() - param_set$add(ParamUty$new("thresholds", custom_check = check_numeric_valid_threshold, tags = "predict")) + param_set = ps(thresholds = p_uty(custom_check = check_numeric_valid_threshold, tags = "predict")) + param_set$values$thresholds = 0.5 super$initialize(id, param_set = param_set, param_vals = param_vals, packages = character(0), input = data.table(name = "input", train = "NULL", predict = "PredictionClassif"), From 88f67abf8e0af0a4f730760f153daf356da02102 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 12:11:36 +0100 Subject: [PATCH 21/46] names() of does not work any more. --- tests/testthat/helper_functions.R | 2 +- tests/testthat/test_GraphLearner.R | 2 +- tests/testthat/test_multiplicities.R | 4 +--- tests/testthat/test_parvals.R | 2 +- tests/testthat/test_pipeop_filter.R | 2 +- tests/testthat/test_pipeop_learnercv.R | 2 +- 6 files changed, 6 insertions(+), 8 deletions(-) diff --git a/tests/testthat/helper_functions.R b/tests/testthat/helper_functions.R index a36fc76d9..5eb4006b8 100644 --- a/tests/testthat/helper_functions.R +++ b/tests/testthat/helper_functions.R @@ -308,7 +308,7 @@ expect_datapreproc_pipeop_class = function(poclass, constargs = list(), task, expect_task(po$predict(list(emptytask))[[1]]) expect_equal(emptytaskfnames, po$predict(list(emptytask))[[1]]$feature_names) - if ("affect_columns" %in% names(po$param_set$params) && affect_context_independent) { + if ("affect_columns" %in% po$param_set$ids() && affect_context_independent) { selector = function(data) data$feature_names[-1] po2$param_set$values$affect_columns = selector trained.subset = po$train(list(task2))[[1]] diff --git a/tests/testthat/test_GraphLearner.R b/tests/testthat/test_GraphLearner.R index fc46b6b79..7125e31f0 100644 --- a/tests/testthat/test_GraphLearner.R +++ b/tests/testthat/test_GraphLearner.R @@ -123,7 +123,7 @@ test_that("graphlearner parameters behave as they should", { dbgr = PipeOpScale$new() %>>% PipeOpLearner$new(dblrn) - expect_subset(c("scale.center", "scale.scale", "classif.debug.x"), names(dbgr$param_set$params)) + expect_subset(c("scale.center", "scale.scale", "classif.debug.x"), dbgr$param_set$ids()) dbgr$param_set$values$classif.debug.x = 1 diff --git a/tests/testthat/test_multiplicities.R b/tests/testthat/test_multiplicities.R index e68c04bc2..a938ce1ca 100644 --- a/tests/testthat/test_multiplicities.R +++ b/tests/testthat/test_multiplicities.R @@ -64,9 +64,7 @@ test_that("PipeOp - evaluate_multiplicities", { public = list( initialize = function(num, id = "multiplicities", param_vals = list()) { assert_int(num, lower = 1L) - ps = ParamSet$new(params = list( - ParamUty$new("state", tags = "train") - )) + ps = ps(state = p_uty(tags = "train")) super$initialize(id, param_set = ps, param_vals = param_vals, input = data.table(name = rep_suffix("input", num), train = "*", predict = "*"), output = data.table(name = rep_suffix("output", num), train = "*", predict = "*"), diff --git a/tests/testthat/test_parvals.R b/tests/testthat/test_parvals.R index 6e6ed6f45..c4c3fa115 100644 --- a/tests/testthat/test_parvals.R +++ b/tests/testthat/test_parvals.R @@ -25,7 +25,7 @@ test_that("graph param vals", { expect_equal(gr$pipeops$pca$param_set$values$center, TRUE) expect_equal(gr$param_set$values$pca.center, TRUE) - expect_set_equal(names(gr$param_set$params), + expect_set_equal(gr$param_set$ids(), c("scale.center", "scale.scale" ,"scale.robust", "scale.affect_columns", "pca.center", "pca.scale.", "pca.rank.", "pca.affect_columns")) expect_error({ diff --git a/tests/testthat/test_pipeop_filter.R b/tests/testthat/test_pipeop_filter.R index fda806a33..a02239fc5 100644 --- a/tests/testthat/test_pipeop_filter.R +++ b/tests/testthat/test_pipeop_filter.R @@ -50,7 +50,7 @@ test_that("PipeOpFilter parameters", { po = PipeOpFilter$new(mlr3filters::FilterVariance$new()) expect_set_equal(c("filter.nfeat", "filter.frac", "filter.cutoff", "filter.permuted"), - grep("^filter\\.", names(po$param_set$params), value = TRUE)) + grep("^filter\\.", po$param_set$ids(), value = TRUE)) po = po$clone(deep = TRUE) # cloning often breaks param connection diff --git a/tests/testthat/test_pipeop_learnercv.R b/tests/testthat/test_pipeop_learnercv.R index f80087ac5..bd369987a 100644 --- a/tests/testthat/test_pipeop_learnercv.R +++ b/tests/testthat/test_pipeop_learnercv.R @@ -41,7 +41,7 @@ test_that("PipeOpLearnerCV - basic properties", { test_that("PipeOpLearnerCV - param values", { lrn = mlr_learners$get("classif.rpart") polrn = PipeOpLearnerCV$new(lrn) - expect_subset(c("minsplit", "resampling.method", "resampling.folds"), names(polrn$param_set$params)) + expect_subset(c("minsplit", "resampling.method", "resampling.folds"), polrn$param_set$ids()) expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 3, resampling.keep_response = FALSE, xval = 0)) polrn$param_set$values$minsplit = 2 expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 3, resampling.keep_response = FALSE, minsplit = 2, xval = 0)) From 83ec565c76b5a43f8cf37dd4ac93d16e05654688 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 12:22:03 +0100 Subject: [PATCH 22/46] make a few more tests pass --- tests/testthat/test_pipeop_impute.R | 12 ++++++------ tests/testthat/test_pipeop_learner.R | 12 ++++++++++-- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test_pipeop_impute.R b/tests/testthat/test_pipeop_impute.R index 0793bf321..9e66ea8fe 100644 --- a/tests/testthat/test_pipeop_impute.R +++ b/tests/testthat/test_pipeop_impute.R @@ -9,12 +9,12 @@ test_that("PipeOpImpute", { PipeOpTestImpute = R6Class("PipeOpTestImpute", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "impute", param_vals = list()) { - ps = ParamSet$new(list( - ParamFct$new("method_num", levels = c("median", "mean", "mode", "sample", "hist", "oor", "constant"), tags = c("train", "predict")), - ParamFct$new("method_fct", levels = c("oor", "sample", "mode", "constant"), tags = c("train", "predict")), - ParamFct$new("add_dummy", levels = c("none", "missing_train", "all"), tags = c("train", "predict")), - ParamUty$new("innum", tags = c("train", "predict")) - )) + ps = ps( + method_num = p_fct(c("median", "mean", "mode", "sample", "hist", "oor", "constant"), tags = c("train", "predict")), + method_fct = p_fct(c("oor", "sample", "mode", "constant"), tags = c("train", "predict")), + add_dummy = p_fct(c("none", "missing_train", "all"), tags = c("train", "predict")), + innum = p_uty(tags = c("train", "predict")) + ) ps$values = list(method_num = "median", method_fct = "oor", add_dummy = "missing_train") super$initialize(id, ps, param_vals = param_vals) }, diff --git a/tests/testthat/test_pipeop_learner.R b/tests/testthat/test_pipeop_learner.R index 164d4a2aa..223a5292d 100644 --- a/tests/testthat/test_pipeop_learner.R +++ b/tests/testthat/test_pipeop_learner.R @@ -33,9 +33,17 @@ test_that("PipeOpLearner - param_set and values", { }) po$param_set$values$minsplit = 2L expect_equal(po$param_set$values, po$learner$param_set$values) - expect_equal(po$param_set$values, list(xval = 0L, minsplit = 2L)) + + sortnames = function(x) { + if (!is.null(names(x))) { + x <- x[order(names(x), decreasing = TRUE)] + } + x + } + + expect_equal(sortnames(po$param_set$values), list(xval = 0L, minsplit = 2L)) po$param_set$values$maxdepth = 1L - expect_equal(po$param_set$values, list(xval = 0L, minsplit = 2L, maxdepth = 1L)) + expect_equal(sortnames(po$param_set$values), list(xval = 0L, minsplit = 2L, maxdepth = 1L)) po$param_set$values = list(minsplit = 1L) expect_equal(po$param_set$values, list(minsplit = 1L)) expect_error({ From b7ed8c29041885454f6054aa2459ebc4c74ed629 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 12:40:53 +0100 Subject: [PATCH 23/46] document --- man/PipeOp.Rd | 2 +- man/PipeOpEnsemble.Rd | 2 +- man/PipeOpImpute.Rd | 2 +- man/PipeOpTargetTrafo.Rd | 2 +- man/PipeOpTaskPreproc.Rd | 2 +- man/PipeOpTaskPreprocSimple.Rd | 2 +- man/mlr_pipeops_branch.Rd | 4 ++-- man/mlr_pipeops_updatetarget.Rd | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd index 4292943b1..a9f533d9d 100644 --- a/man/PipeOp.Rd +++ b/man/PipeOp.Rd @@ -39,7 +39,7 @@ is not intended to be instantiated. \section{Construction}{ -\if{html}{\out{
}}\preformatted{PipeOp$new(id, param_set = ParamSet$new(), param_vals = list(), input, output, packages = character(0), tags = character(0)) +\if{html}{\out{
}}\preformatted{PipeOp$new(id, param_set = ps(), param_vals = list(), input, output, packages = character(0), tags = character(0)) }\if{html}{\out{
}} \itemize{ \item \code{id} :: \code{character(1)}\cr diff --git a/man/PipeOpEnsemble.Rd b/man/PipeOpEnsemble.Rd index 16e3204a6..e723c06c1 100644 --- a/man/PipeOpEnsemble.Rd +++ b/man/PipeOpEnsemble.Rd @@ -14,7 +14,7 @@ for a \code{PipeOp} and requires deriving classes to create the \code{private$we Note: This object is typically constructed via a derived class, e.g. \code{\link{PipeOpClassifAvg}} or \code{\link{PipeOpRegrAvg}}. -\if{html}{\out{
}}\preformatted{PipeOpEnsemble$new(innum = 0, collect_multiplicity = FALSE, id, param_set = ParamSet$new(), param_vals = list(), packages = character(0), prediction_type = "Prediction") +\if{html}{\out{
}}\preformatted{PipeOpEnsemble$new(innum = 0, collect_multiplicity = FALSE, id, param_set = ps(), param_vals = list(), packages = character(0), prediction_type = "Prediction") }\if{html}{\out{
}} \itemize{ \item \code{innum} :: \code{numeric(1)}\cr diff --git a/man/PipeOpImpute.Rd b/man/PipeOpImpute.Rd index 42112caab..86ac7e237 100644 --- a/man/PipeOpImpute.Rd +++ b/man/PipeOpImpute.Rd @@ -12,7 +12,7 @@ Abstract base class for feature imputation. \section{Construction}{ -\if{html}{\out{
}}\preformatted{PipeOpImpute$$new(id, param_set = ParamSet$new(), param_vals = list(), whole_task_dependent = FALSE, packages = character(0), task_type = "Task") +\if{html}{\out{
}}\preformatted{PipeOpImpute$$new(id, param_set = ps(), param_vals = list(), whole_task_dependent = FALSE, packages = character(0), task_type = "Task") }\if{html}{\out{
}} \itemize{ \item \code{id} :: \code{character(1)}\cr diff --git a/man/PipeOpTargetTrafo.Rd b/man/PipeOpTargetTrafo.Rd index 88491fb40..4e7a48399 100644 --- a/man/PipeOpTargetTrafo.Rd +++ b/man/PipeOpTargetTrafo.Rd @@ -19,7 +19,7 @@ Users can overload up to four \verb{private$}-functions: \code{.get_state()} (op \section{Construction}{ -\if{html}{\out{
}}\preformatted{PipeOpTargetTrafo$new(id, param_set = ParamSet$new(), param_vals = list() packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) +\if{html}{\out{
}}\preformatted{PipeOpTargetTrafo$new(id, param_set = ps(), param_vals = list() packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) }\if{html}{\out{
}} \itemize{ \item \code{id} :: \code{character(1)}\cr diff --git a/man/PipeOpTaskPreproc.Rd b/man/PipeOpTaskPreproc.Rd index 0930a021c..6cceee33e 100644 --- a/man/PipeOpTaskPreproc.Rd +++ b/man/PipeOpTaskPreproc.Rd @@ -40,7 +40,7 @@ the \code{\link{PipeOpTaskPreprocSimple}} class can be used instead. \section{Construction}{ -\if{html}{\out{
}}\preformatted{PipeOpTaskPreproc$new(id, param_set = ParamSet$new(), param_vals = list(), can_subset_cols = TRUE, +\if{html}{\out{
}}\preformatted{PipeOpTaskPreproc$new(id, param_set = ps(), param_vals = list(), can_subset_cols = TRUE, packages = character(0), task_type = "Task", tags = NULL, feature_types = mlr_reflections$task_feature_types) }\if{html}{\out{
}} \itemize{ diff --git a/man/PipeOpTaskPreprocSimple.Rd b/man/PipeOpTaskPreprocSimple.Rd index 07ec76149..7b85fde32 100644 --- a/man/PipeOpTaskPreprocSimple.Rd +++ b/man/PipeOpTaskPreprocSimple.Rd @@ -26,7 +26,7 @@ This inherits from \code{\link{PipeOpTaskPreproc}} and behaves essentially the s \section{Construction}{ -\if{html}{\out{
}}\preformatted{PipeOpTaskPreprocSimple$new(id, param_set = ParamSet$new(), param_vals = list(), can_subset_cols = TRUE, packages = character(0), task_type = "Task") +\if{html}{\out{
}}\preformatted{PipeOpTaskPreprocSimple$new(id, param_set = ps(), param_vals = list(), can_subset_cols = TRUE, packages = character(0), task_type = "Task") }\if{html}{\out{
}} (Construction is identical to \code{\link{PipeOpTaskPreproc}}.) diff --git a/man/mlr_pipeops_branch.Rd b/man/mlr_pipeops_branch.Rd index 273cca0fc..a53e08339 100644 --- a/man/mlr_pipeops_branch.Rd +++ b/man/mlr_pipeops_branch.Rd @@ -24,9 +24,9 @@ Not to be confused with \code{\link{PipeOpCopy}}, the naming scheme is a bit unf \item \code{options} :: \code{numeric(1)} | \code{character}\cr If \code{options} is an integer number, it determines the number of output channels / options that are created, named \code{output1}...\verb{output}. The -\verb{$selection} parameter will then be a \code{\link{ParamInt}}. +\verb{$selection} parameter will then be an integer. If \code{options} is a \code{character}, it determines the names of channels directly. -The \verb{$selection} parameter will then be a \code{\link{ParamFct}}. +The \verb{$selection} parameter will then be factorial. \item \code{id} :: \code{character(1)}\cr Identifier of resulting object, default \code{"branch"}. \item \code{param_vals} :: named \code{list}\cr diff --git a/man/mlr_pipeops_updatetarget.Rd b/man/mlr_pipeops_updatetarget.Rd index ade488853..d14b91555 100644 --- a/man/mlr_pipeops_updatetarget.Rd +++ b/man/mlr_pipeops_updatetarget.Rd @@ -27,7 +27,7 @@ name of the data of the input \code{\link[mlr3:Task]{Task}}, this column is set \section{Construction}{ -\if{html}{\out{
}}\preformatted{PipeOpUpdateTarget$new(id, param_set = ParamSet$new(), +\if{html}{\out{
}}\preformatted{PipeOpUpdateTarget$new(id, param_set = ps(), param_vals = list(), packages = character(0)) }\if{html}{\out{
}} \itemize{ From bdd65b45ec3691edd5d770813d0f58398d88c524 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 12:41:03 +0100 Subject: [PATCH 24/46] check for 'old' in a different way --- R/zzz.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/zzz.R b/R/zzz.R index 218cc0548..cf50d0dfe 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -29,7 +29,7 @@ paradox_info <- list2env(list(is_old = FALSE), parent = emptyenv()) if (Sys.getenv("IN_PKGDOWN") == "true") { lg$set_threshold("warn") } - paradox_info$is_old = !is.null(ps()$set_id) + paradox_info$is_old = "set_id" %in% names(ps()) } # nocov end .onUnload = function(libpath) { # nocov start From 42db529997180178632b2de3a59a1254d798d050 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 12:41:11 +0100 Subject: [PATCH 25/46] use compatible bbotk --- .github/workflows/r-cmd-check-paradox.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/r-cmd-check-paradox.yml b/.github/workflows/r-cmd-check-paradox.yml index db4229696..ae0bd2252 100644 --- a/.github/workflows/r-cmd-check-paradox.yml +++ b/.github/workflows/r-cmd-check-paradox.yml @@ -31,7 +31,7 @@ jobs: - uses: actions/checkout@v3 - name: paradox - run: 'echo -e "Remotes:\n mlr-org/paradox@s3_params,\n mlr-org/mlr3learners@s3params_compat,\n mlr-org/mlr3filters@s3params_compat" >> DESCRIPTION' + run: 'echo -e "Remotes:\n mlr-org/paradox@s3_params,\n mlr-org/mlr3learners@s3params_compat,\n mlr-org/mlr3filters@s3params_compat,\n mlr-org/bbotk@s3params_compat" >> DESCRIPTION' - uses: r-lib/actions/setup-r@v2 with: From 50511397c1c19ca474f6398b379827a36add8adb Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 13:09:26 +0100 Subject: [PATCH 26/46] test_dictionary with new paramset --- tests/testthat/test_dictionary.R | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/tests/testthat/test_dictionary.R b/tests/testthat/test_dictionary.R index e3ae20d97..1a598668c 100644 --- a/tests/testthat/test_dictionary.R +++ b/tests/testthat/test_dictionary.R @@ -117,31 +117,37 @@ test_that("Dictionary contains all PipeOps", { expect_equal(other_obj$phash, test_obj$phash, info = paste(dictname, "$new id test 2")) expect_equal(inflate(do.call(pogen$new, args)), test_obj, info = dictname) + + tops = test_obj$param_set # we now check if hyperparameters can be changed through construction # we do this by automatically generating a hyperparameter value that deviates from the automatically constructed one. # However, for ParamUty we can't do that, so if there are only 'ParamUty' parameter we skip this part. - eligibleparams = test_obj$param_set$params[test_obj$param_set$class != "ParamUty"] - eligibleparams = discard(eligibleparams, function(p) { - # filter out discrete params with only one level, or the numeric parameters with $lower == $upper - # The use '&&' here is intentional, because numeric parameters have 0 levels, and discrete parameters have $lower == $upper (== NA) - length(p$levels) < 2 && isTRUE(all.equal(p$lower, p$upper)) - }) + eligibleparams = which( + tops$class != "ParamUty" & + # filter out discrete params with only one level, or the numeric parameters with $lower == $upper + # Note that numeric parameters have 0 levels, and discrete parameters have $lower == $upper (== NA) + ( + (!is.na(tops$lower) & tops$lower != tops$upper) | + (is.finite(tops$nlevels) & tops$nlevels > 1) + ) + ) if (length(eligibleparams)) { - testingparam = eligibleparams[[1]] + testingparam = tops$ids()[[eligibleparams[[1]]]] # we want to construct an object where the parameter value is *different* from the value it gets on construction by default. # For this we take a few candidate values and `setdiff` the original value - origval = as.atomic(test_obj$param_set$values[[testingparam$id]]) - if (testingparam$class %in% c("ParamLgl", "ParamFct")) { - candidates = testingparam$levels + origval = as.atomic(test_obj$param_set$values[[testingparam]]) + if (tops$class[[testingparam]] %in% c("ParamLgl", "ParamFct")) { + candidates = tops$levels[[testingparam]] } else { - candidates = Filter(function(x) is.finite(x) && !is.na(x), c(testingparam$lower, testingparam$upper, testingparam$lower + 1, 0, origval + 1)) + candidates = Filter(function(x) is.finite(x) && !is.na(x), + c(tops$lower[[testingparam]], tops$upper[[testingparam]], tops$lower[[testingparam]] + 1, 0, origval + 1)) } val = setdiff(candidates, origval)[1] # construct the `param_vals = list(PARNAME = PARVAL)` construction argument args$param_vals = list(val) - names(args$param_vals) = testingparam$id + names(args$param_vals) = testingparam # check that the constructed object is different from the test_obj, but setting the test_obj's parameter # makes them equal again. @@ -152,7 +158,7 @@ test_that("Dictionary contains all PipeOps", { # phash should be independent of this! expect_true(isTRUE(all.equal(dict_constructed$phash, test_obj$phash)), dictname) - test_obj$param_set$values[[testingparam$id]] = val + test_obj$param_set$values[[testingparam]] = val expect_equal(touch(dict_constructed), test_obj) expect_equal(inflate(touch(gen_constructed)), test_obj) From 3d3e489d6a172dea72f9ddc74d9140135c5cb53d Mon Sep 17 00:00:00 2001 From: mb706 Date: Sun, 14 Jan 2024 13:35:40 +0100 Subject: [PATCH 27/46] don't use broken github mlr3filters --- .github/workflows/r-cmd-check-paradox.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/r-cmd-check-paradox.yml b/.github/workflows/r-cmd-check-paradox.yml index ae0bd2252..61ddf0286 100644 --- a/.github/workflows/r-cmd-check-paradox.yml +++ b/.github/workflows/r-cmd-check-paradox.yml @@ -31,7 +31,7 @@ jobs: - uses: actions/checkout@v3 - name: paradox - run: 'echo -e "Remotes:\n mlr-org/paradox@s3_params,\n mlr-org/mlr3learners@s3params_compat,\n mlr-org/mlr3filters@s3params_compat,\n mlr-org/bbotk@s3params_compat" >> DESCRIPTION' + run: 'echo -e "Remotes:\n mlr-org/paradox@s3_params,\n mlr-org/mlr3learners@s3params_compat,\n mlr-org/bbotk@s3params_compat" >> DESCRIPTION' - uses: r-lib/actions/setup-r@v2 with: From 66d605c41faa3cfa12837491033cbf4a51a61bc8 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 20 Jan 2024 19:02:21 +0100 Subject: [PATCH 28/46] more checks --- .../r-cmd-check-old-paradox-new-other.yml | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 .github/workflows/r-cmd-check-old-paradox-new-other.yml diff --git a/.github/workflows/r-cmd-check-old-paradox-new-other.yml b/.github/workflows/r-cmd-check-old-paradox-new-other.yml new file mode 100644 index 000000000..4713deb6c --- /dev/null +++ b/.github/workflows/r-cmd-check-old-paradox-new-other.yml @@ -0,0 +1,44 @@ +# r cmd check workflow of the mlr3 ecosystem v0.1.0 +# https://github.com/mlr-org/actions +on: + workflow_dispatch: + push: + branches: + - master + pull_request: + branches: + - master + +name: r-cmd-check-paradox + +jobs: + r-cmd-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + strategy: + fail-fast: false + matrix: + config: + - {os: ubuntu-latest, r: 'devel'} + - {os: ubuntu-latest, r: 'release'} + + steps: + - uses: actions/checkout@v3 + + - name: paradox + run: 'echo -e "Remotes:\n mlr-org/mlr3learners@s3params_compat,\n mlr-org/bbotk@s3params_compat" >> DESCRIPTION' + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + - uses: r-lib/actions/check-r-package@v2 From 13e00e4efeb924fc00681b554e089da8ea7fd2f6 Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Tue, 23 Jan 2024 17:45:36 +0100 Subject: [PATCH 29/46] revert keep_results --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS.md | 5 ++--- R/Graph.R | 5 ++--- R/PipeOp.R | 2 +- man/Graph.Rd | 3 +-- man/PipeOp.Rd | 2 +- man/mlr_pipeops_nmf.Rd | 2 +- tests/testthat/test_Graph.R | 11 ----------- 9 files changed, 10 insertions(+), 23 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5e6b8a1ef..10fb81ecd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -93,7 +93,7 @@ Config/testthat/edition: 3 Config/testthat/parallel: true NeedsCompilation: no Roxygen: list(markdown = TRUE, r6 = FALSE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.2.3.9000 VignetteBuilder: knitr Collate: 'Graph.R' diff --git a/NAMESPACE b/NAMESPACE index a40fd7b44..c0b0975d9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -148,6 +148,7 @@ import(mlr3) import(mlr3misc) import(paradox) importFrom(R6,R6Class) +importFrom(data.table,as.data.table) importFrom(digest,digest) importFrom(stats,setNames) importFrom(utils,bibentry) diff --git a/NEWS.md b/NEWS.md index 940986bc2..f2dbc84a6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,9 @@ # mlr3pipelines 0.5.0-9000 -* Feature: The `Graph`'s `keep_results` can now also be a character vector -containing the IDs of the `PipeOp`s whose results are being stored. * Feature: The `$add_pipeop()` method got an argument `clone` that is `TRUE` by default (previously `PipeOp`s were always cloned) -* Bugfix: `PipeOpFeatureUnion` in some rare cases dropped variables called `"x"` +* Bugfix: `PipeOpFeatureUnion` in some rare cases dropped variables called +`"x"`, this is no longer the case # mlr3pipelines 0.5.0-1 diff --git a/R/Graph.R b/R/Graph.R index f4fc796ad..7df319808 100644 --- a/R/Graph.R +++ b/R/Graph.R @@ -59,9 +59,8 @@ #' * `phash` :: `character(1)` \cr #' Stores a checksum calculated on the [`Graph`] configuration, which includes all [`PipeOp`] hashes #' *except* their `$param_set$values`, and a hash of `$edges`. -#' * `keep_results` :: `logical(1)` or `character()` \cr +#' * `keep_results` :: `logical(1)`\cr #' Whether to store intermediate results in the [`PipeOp`]'s `$.result` slot, mostly for debugging purposes. Default `FALSE`. -#' Can also be a character vector of IDs, in which case only the results of the selected `PipeOp`s are stored. #' * `man` :: `character(1)`\cr #' Identifying string of the help page that shows with `help()`. #' @@ -644,7 +643,7 @@ graph_reduce = function(self, input, fun, single_input) { lg$debug("Running PipeOp '%s$%s()'", id, fun, pipeop = op, input = input) output = op[[fun]](input) - if (isTRUE(self$keep_results) || op$id %in% self$keep_results) { + if (self$keep_results) { op$.result = output } diff --git a/R/PipeOp.R b/R/PipeOp.R index c9b393e70..24a12ab6c 100644 --- a/R/PipeOp.R +++ b/R/PipeOp.R @@ -130,7 +130,7 @@ #' [`PipeOp`]'s functionality may change depending on more than these values, it should inherit the `$hash` active #' binding and calculate the hash as `digest(list(super$hash, ), algo = "xxhash64")`. #' * `.result` :: `list` \cr -#' If the [`Graph`]'s `$keep_results` flag is set to `TRUE` or contains the ID of this `PipeOp`, then the intermediate Results of `$train()` and `$predict()` +#' If the [`Graph`]'s `$keep_results` flag is set to `TRUE`, then the intermediate Results of `$train()` and `$predict()` #' are saved to this slot, exactly as they are returned by these functions. This is mainly for debugging purposes #' and done, if requested, by the [`Graph`] backend itself; it should *not* be done explicitly by `private$.train()` or `private$.predict()`. #' * `man` :: `character(1)`\cr diff --git a/man/Graph.Rd b/man/Graph.Rd index 0db8e24a6..ab76b20c8 100644 --- a/man/Graph.Rd +++ b/man/Graph.Rd @@ -69,9 +69,8 @@ Stores a checksum calculated on the \code{\link{Graph}} configuration, which inc \item \code{phash} :: \code{character(1)} \cr Stores a checksum calculated on the \code{\link{Graph}} configuration, which includes all \code{\link{PipeOp}} hashes \emph{except} their \verb{$param_set$values}, and a hash of \verb{$edges}. -\item \code{keep_results} :: \code{logical(1)} or \code{character()} \cr +\item \code{keep_results} :: \code{logical(1)}\cr Whether to store intermediate results in the \code{\link{PipeOp}}'s \verb{$.result} slot, mostly for debugging purposes. Default \code{FALSE}. -Can also be a character vector of IDs, in which case only the results of the selected \code{PipeOp}s are stored. \item \code{man} :: \code{character(1)}\cr Identifying string of the help page that shows with \code{help()}. } diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd index 553ca0301..4292943b1 100644 --- a/man/PipeOp.Rd +++ b/man/PipeOp.Rd @@ -137,7 +137,7 @@ Checksum calculated on the \code{\link{PipeOp}}, depending on the \code{\link{Pi \code{\link{PipeOp}}'s functionality may change depending on more than these values, it should inherit the \verb{$hash} active binding and calculate the hash as \verb{digest(list(super$hash, ), algo = "xxhash64")}. \item \code{.result} :: \code{list} \cr -If the \code{\link{Graph}}'s \verb{$keep_results} flag is set to \code{TRUE} or contains the ID of this \code{PipeOp}, then the intermediate Results of \verb{$train()} and \verb{$predict()} +If the \code{\link{Graph}}'s \verb{$keep_results} flag is set to \code{TRUE}, then the intermediate Results of \verb{$train()} and \verb{$predict()} are saved to this slot, exactly as they are returned by these functions. This is mainly for debugging purposes and done, if requested, by the \code{\link{Graph}} backend itself; it should \emph{not} be done explicitly by \code{private$.train()} or \code{private$.predict()}. \item \code{man} :: \code{character(1)}\cr diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd index 3c8a75c9a..5e967fab2 100644 --- a/man/mlr_pipeops_nmf.Rd +++ b/man/mlr_pipeops_nmf.Rd @@ -96,7 +96,7 @@ See \code{\link[NMF:nmf]{nmf()}}. \section{Internals}{ -Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis]{basis()}}, \code{\link[NMF:coef]{coef()}} and +Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis-coef-methods]{basis()}}, \code{\link[NMF:basis-coef-methods]{coef()}} and \code{\link[MASS:ginv]{ginv()}}. } diff --git a/tests/testthat/test_Graph.R b/tests/testthat/test_Graph.R index af40c8f15..ed6f900d8 100644 --- a/tests/testthat/test_Graph.R +++ b/tests/testthat/test_Graph.R @@ -501,14 +501,3 @@ test_that("Same output into multiple channels does not cause a bug", { expect_true(res$po3.output1 == 2) expect_true(res$po4.output1 == 2) }) - -test_that("keep_results can be a character vector", { - graph = po("pca") %>>% po("ica") - - graph$keep_results = "pca" - - graph$train(tsk("iris")) - - expect_true(is.null(graph$pipeops$ica$.result)) - expect_class(graph$pipeops$pca$.result[[1L]], "Task") -}) From c75b598aa6f9176e254ed6b289d0597416795441 Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Sat, 27 Jan 2024 18:49:21 +0100 Subject: [PATCH 30/46] change default for backwards compatability --- R/assert_graph.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/assert_graph.R b/R/assert_graph.R index 828f4b4fc..fadaa22b7 100644 --- a/R/assert_graph.R +++ b/R/assert_graph.R @@ -39,7 +39,9 @@ as_graph = function(x, clone = FALSE) { } #' @export -as_graph.default = function(x, clone = FALSE) { +as_graph.default = function(x, clone = TRUE) { + # different default than other methods for backwards compatibility + # previously $add_pipeop() always cloned its input Graph$new()$add_pipeop(x, clone = clone) } From 22ae608b9112ced8ff5f933d3599f5f0c7a710eb Mon Sep 17 00:00:00 2001 From: Maximilian Muecke Date: Wed, 31 Jan 2024 15:03:52 +0100 Subject: [PATCH 31/46] ci: add dependabot for GHA --- .github/dependabot.yml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..5ace4600a --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" From e7b3e97ec7100e279f311db9e6b0abd126559a4a Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 28 Feb 2024 12:51:19 +0100 Subject: [PATCH 32/46] trigger actions From 9c60738134b753967b8c5cff3ade97f9bfb83575 Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 28 Feb 2024 15:26:44 +0100 Subject: [PATCH 33/46] dev cmd check with paradox master (let's see if this works) --- ...aradox-new-other.yml => dev-cmd-check.yml} | 21 ++++----- .github/workflows/r-cmd-check-paradox.yml | 44 ------------------- 2 files changed, 11 insertions(+), 54 deletions(-) rename .github/workflows/{r-cmd-check-old-paradox-new-other.yml => dev-cmd-check.yml} (60%) delete mode 100644 .github/workflows/r-cmd-check-paradox.yml diff --git a/.github/workflows/r-cmd-check-old-paradox-new-other.yml b/.github/workflows/dev-cmd-check.yml similarity index 60% rename from .github/workflows/r-cmd-check-old-paradox-new-other.yml rename to .github/workflows/dev-cmd-check.yml index 4713deb6c..b61ffa4bb 100644 --- a/.github/workflows/r-cmd-check-old-paradox-new-other.yml +++ b/.github/workflows/dev-cmd-check.yml @@ -1,21 +1,21 @@ -# r cmd check workflow of the mlr3 ecosystem v0.1.0 +# dev cmd check workflow of the mlr3 ecosystem v0.1.0 # https://github.com/mlr-org/actions on: workflow_dispatch: push: branches: - - master + - master pull_request: branches: - master -name: r-cmd-check-paradox +name: dev-check jobs: - r-cmd-check: + check-package: runs-on: ${{ matrix.config.os }} - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + name: ${{ matrix.config.dev-package }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} @@ -24,15 +24,11 @@ jobs: fail-fast: false matrix: config: - - {os: ubuntu-latest, r: 'devel'} - - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'release', dev-package: "mlr-org/bbotk', 'mlr-org/mlr3learners', 'mlr-org/paradox"} steps: - uses: actions/checkout@v3 - - name: paradox - run: 'echo -e "Remotes:\n mlr-org/mlr3learners@s3params_compat,\n mlr-org/bbotk@s3params_compat" >> DESCRIPTION' - - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} @@ -41,4 +37,9 @@ jobs: with: extra-packages: any::rcmdcheck needs: check + + - name: Install dev versions + run: pak::pkg_install(c('${{ matrix.config.dev-package }}')) + shell: Rscript {0} + - uses: r-lib/actions/check-r-package@v2 diff --git a/.github/workflows/r-cmd-check-paradox.yml b/.github/workflows/r-cmd-check-paradox.yml deleted file mode 100644 index 61ddf0286..000000000 --- a/.github/workflows/r-cmd-check-paradox.yml +++ /dev/null @@ -1,44 +0,0 @@ -# r cmd check workflow of the mlr3 ecosystem v0.1.0 -# https://github.com/mlr-org/actions -on: - workflow_dispatch: - push: - branches: - - master - pull_request: - branches: - - master - -name: r-cmd-check-paradox - -jobs: - r-cmd-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - strategy: - fail-fast: false - matrix: - config: - - {os: ubuntu-latest, r: 'devel'} - - {os: ubuntu-latest, r: 'release'} - - steps: - - uses: actions/checkout@v3 - - - name: paradox - run: 'echo -e "Remotes:\n mlr-org/paradox@s3_params,\n mlr-org/mlr3learners@s3params_compat,\n mlr-org/bbotk@s3params_compat" >> DESCRIPTION' - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - extra-packages: any::rcmdcheck - needs: check - - uses: r-lib/actions/check-r-package@v2 From c52d7e184e6631eae0d9d7cf25b97fbf7ca65bcc Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 28 Feb 2024 16:02:54 +0100 Subject: [PATCH 34/46] news update --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 2eac208ae..0aaad50dc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # mlr3pipelines 0.5.0-9000 +* Compatibility with upcoming paradox release # mlr3pipelines 0.5.0-2 From 99ee083a1b340e0e159a80fa2c05f63916200953 Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 26 Mar 2024 15:43:51 +0100 Subject: [PATCH 35/46] doc --- R/pipeline_bagging.R | 1 - man/mlr_graphs_bagging.Rd | 1 - 2 files changed, 2 deletions(-) diff --git a/R/pipeline_bagging.R b/R/pipeline_bagging.R index 2ffd03d7a..31b743d32 100644 --- a/R/pipeline_bagging.R +++ b/R/pipeline_bagging.R @@ -42,7 +42,6 @@ #' resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() #' #' # The original bagging method uses boosting by sampling with replacement. -#' # This may give better performance but is also slower. #' gr = ppl("bagging", lrn_po, frac = 1, replace = TRUE, #' averager = po("regravg", collect_multiplicity = TRUE)) #' resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() diff --git a/man/mlr_graphs_bagging.Rd b/man/mlr_graphs_bagging.Rd index 186ac71ff..58d5e1e83 100644 --- a/man/mlr_graphs_bagging.Rd +++ b/man/mlr_graphs_bagging.Rd @@ -62,7 +62,6 @@ gr = pipeline_bagging(lrn_po, 3, averager = po("regravg", collect_multiplicity = resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() # The original bagging method uses boosting by sampling with replacement. -# This may give better performance but is also slower. gr = ppl("bagging", lrn_po, frac = 1, replace = TRUE, averager = po("regravg", collect_multiplicity = TRUE)) resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() From 0d8a29d31ae5fc5d945eaf12527444835287421d Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 26 Mar 2024 16:15:39 +0100 Subject: [PATCH 36/46] tests --- tests/testthat/test_mlr_graphs_bagging.R | 33 ++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/testthat/test_mlr_graphs_bagging.R b/tests/testthat/test_mlr_graphs_bagging.R index a5dc1067c..b7deb981a 100644 --- a/tests/testthat/test_mlr_graphs_bagging.R +++ b/tests/testthat/test_mlr_graphs_bagging.R @@ -39,3 +39,36 @@ test_that("Bagging Pipeline", { expect_true(all(map_lgl(predict_out, function(x) "PredictionClassif" %in% class(x)))) }) +test_that("Bagging with replacement", { + tsk = tsk("iris") + lrn = lrn("classif.rpart") + p = ppl("bagging", graph = po(lrn), replace = TRUE, averager = po("classifavg", collect_multiplicity = TRUE)) + expect_graph(p) + res = resample(tsk, GraphLearner$new(p), rsmp("holdout")) + expect_resample_result(res) + + tsk$filter(1:140) + expect_equal(anyDuplicated(tsk$data()), 0) # make sure no duplicates + + p = ppl("bagging", iterations = 2, + graph = lrn("classif.debug", save_tasks = TRUE), + replace = TRUE, averager = po("classifavg", collect_multiplicity = TRUE) + ) + p$train(tsk) + + expect_true(anyDuplicated(p$pipeops$classif.debug$state[[1]]$model$task_train$data()) != 0) + + getOrigId = function(data) { + tsk$data()[, origline := .I][data, on = colnames(tsk$data()), origline] + } + orig_id_1 = getOrigId(p$pipeops$classif.debug$state[[1]]$model$task_train$data()) + orig_id_2 = getOrigId(p$pipeops$classif.debug$state[[2]]$model$task_train$data()) + + expect_equal(length(orig_id_1), 140) + expect_equal(length(orig_id_2), 140) + # if we sampled the same values twice, the all.equal() would just give TRUE + expect_string(all.equal(orig_id_1, orig_id_2)) + + expect_true(length(unique(orig_id_1)) < 140) + expect_true(length(unique(orig_id_2)) < 140) +}) From 1bcf1e403e51654a90bdd3c41a5dd1088ea90ef9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 26 Mar 2024 15:17:04 +0000 Subject: [PATCH 37/46] Bump actions/checkout from 3 to 4 Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/dev-cmd-check.yml | 2 +- .github/workflows/pkgdown.yml | 2 +- .github/workflows/r-cmd-check.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/dev-cmd-check.yml b/.github/workflows/dev-cmd-check.yml index b61ffa4bb..77e4a431e 100644 --- a/.github/workflows/dev-cmd-check.yml +++ b/.github/workflows/dev-cmd-check.yml @@ -27,7 +27,7 @@ jobs: - {os: ubuntu-latest, r: 'release', dev-package: "mlr-org/bbotk', 'mlr-org/mlr3learners', 'mlr-org/paradox"} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml index 09f42a5a8..bd3abe4e4 100644 --- a/.github/workflows/pkgdown.yml +++ b/.github/workflows/pkgdown.yml @@ -23,7 +23,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 diff --git a/.github/workflows/r-cmd-check.yml b/.github/workflows/r-cmd-check.yml index fbd78ab3c..8d13d0e56 100644 --- a/.github/workflows/r-cmd-check.yml +++ b/.github/workflows/r-cmd-check.yml @@ -28,7 +28,7 @@ jobs: - {os: ubuntu-latest, r: 'release'} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: From 830bde982b8aa9deefc40d5feda18922fa571f2b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 26 Mar 2024 15:17:08 +0000 Subject: [PATCH 38/46] Bump JamesIves/github-pages-deploy-action from 4.4.1 to 4.5.0 Bumps [JamesIves/github-pages-deploy-action](https://github.com/jamesives/github-pages-deploy-action) from 4.4.1 to 4.5.0. - [Release notes](https://github.com/jamesives/github-pages-deploy-action/releases) - [Commits](https://github.com/jamesives/github-pages-deploy-action/compare/v4.4.1...v4.5.0) --- updated-dependencies: - dependency-name: JamesIves/github-pages-deploy-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/pkgdown.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml index 09f42a5a8..63071248b 100644 --- a/.github/workflows/pkgdown.yml +++ b/.github/workflows/pkgdown.yml @@ -44,7 +44,7 @@ jobs: - name: Deploy if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.4.1 + uses: JamesIves/github-pages-deploy-action@v4.5.0 with: clean: false branch: gh-pages From 22ecbfc008235675a5dd6a6fd4eeb6b08eb134e0 Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 26 Mar 2024 17:17:12 +0100 Subject: [PATCH 39/46] fix --- tests/testthat/test_mlr_graphs_bagging.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_mlr_graphs_bagging.R b/tests/testthat/test_mlr_graphs_bagging.R index b7deb981a..15a70fb0c 100644 --- a/tests/testthat/test_mlr_graphs_bagging.R +++ b/tests/testthat/test_mlr_graphs_bagging.R @@ -50,7 +50,7 @@ test_that("Bagging with replacement", { tsk$filter(1:140) expect_equal(anyDuplicated(tsk$data()), 0) # make sure no duplicates - p = ppl("bagging", iterations = 2, + p = ppl("bagging", iterations = 2, frac = 1, graph = lrn("classif.debug", save_tasks = TRUE), replace = TRUE, averager = po("classifavg", collect_multiplicity = TRUE) ) From 11a22ce907b54f38bce6fb206de5ace362faaa1f Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 26 Mar 2024 17:26:14 +0100 Subject: [PATCH 40/46] Fix typo closes #743 --- R/ppl.R | 2 +- man/ppl.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ppl.R b/R/ppl.R index 113f80420..1754fb9de 100644 --- a/R/ppl.R +++ b/R/ppl.R @@ -3,7 +3,7 @@ #' @description #' Creates a [`Graph`] from [`mlr_graphs`] from given ID #' -#' `ppl()` taks a `charcter(1)` and returns a [`Graph`]. `ppls()` takes a `character` +#' `ppl()` taks a `character(1)` and returns a [`Graph`]. `ppls()` takes a `character` #' vector of any list and returns a `list` of possibly muliple [`Graph`]s. #' #' @param .key `[character(1)]`\cr diff --git a/man/ppl.Rd b/man/ppl.Rd index df4cefc78..713312f33 100644 --- a/man/ppl.Rd +++ b/man/ppl.Rd @@ -28,7 +28,7 @@ named \code{list} is returned, but unlike \code{\link[=pos]{pos()}} it will not \description{ Creates a \code{\link{Graph}} from \code{\link{mlr_graphs}} from given ID -\code{ppl()} taks a \code{charcter(1)} and returns a \code{\link{Graph}}. \code{ppls()} takes a \code{character} +\code{ppl()} taks a \code{character(1)} and returns a \code{\link{Graph}}. \code{ppls()} takes a \code{character} vector of any list and returns a \code{list} of possibly muliple \code{\link{Graph}}s. } \examples{ From 0804b4464c5f10a807c709c64094faf5374929b7 Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 26 Mar 2024 17:38:21 +0100 Subject: [PATCH 41/46] rename featureunion in ppls --- NEWS.md | 1 + R/pipeline_robustify.R | 2 +- R/pipeline_stacking.R | 2 +- tests/testthat/test_ppl.R | 16 ++++++++++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index dba77a500..aac4a0302 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # mlr3pipelines 0.5.0-9000 +* Changed the ID of `PipeOpFeatureUnion` used in `ppl("robustify")` and `ppl("stacking")` * Feature: The `$add_pipeop()` method got an argument `clone` (old behaviour `TRUE` by default) * Bugfix: `PipeOpFeatureUnion` in some rare cases dropped variables called `"x"` * Compatibility with upcoming paradox release diff --git a/R/pipeline_robustify.R b/R/pipeline_robustify.R index 1d9774627..abb386c4b 100644 --- a/R/pipeline_robustify.R +++ b/R/pipeline_robustify.R @@ -170,7 +170,7 @@ pipeline_robustify = function(task = NULL, learner = NULL, imputing, po("missind", affect_columns = selector_type(c("numeric", "integer", "logical")), type = if (missind_numeric) "numeric" else "factor") )), - if (has_numbers || has_logicals) po("featureunion"), + if (has_numbers || has_logicals) po("featureunion", id = "featureunion_robustify"), if (has_factorials) po("imputeoor") ) diff --git a/R/pipeline_stacking.R b/R/pipeline_stacking.R index cb1512126..4ac1f0a0d 100644 --- a/R/pipeline_stacking.R +++ b/R/pipeline_stacking.R @@ -53,7 +53,7 @@ pipeline_stacking = function(base_learners, super_learner, method = "cv", folds if (use_features) base_learners_cv = c(base_learners_cv, po("nop")) gunion(base_learners_cv, in_place = TRUE) %>>!% - po("featureunion") %>>!% + po("featureunion", id = "featureunion_stacking") %>>!% super_learner } diff --git a/tests/testthat/test_ppl.R b/tests/testthat/test_ppl.R index 948cbbf79..625fab0ce 100644 --- a/tests/testthat/test_ppl.R +++ b/tests/testthat/test_ppl.R @@ -57,3 +57,19 @@ test_that("mlr_pipeops multi-access works", { expect_equal(ppls(), mlr_graphs) }) + +test_that("mlr3book authors don't sleepwalk through life", { + + tasks = tsks(c("breast_cancer", "sonar")) + + + glrn_stack = as_learner(ppl("robustify") %>>% ppl("stacking", + lrns(c("classif.rpart", "classif.debug")), + lrn("classif.log_reg") + )) + glrn_stack$id = "Stack" + + learners = c(glrn_stack) + bmr = benchmark(benchmark_grid(tasks, learners, rsmp("cv", folds = 3))) + +}) From 0fefbbc64b962f8afd528de038ba9f455af1865a Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 26 Mar 2024 17:41:08 +0100 Subject: [PATCH 42/46] use different learners --- tests/testthat/test_ppl.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test_ppl.R b/tests/testthat/test_ppl.R index 625fab0ce..9c88f868b 100644 --- a/tests/testthat/test_ppl.R +++ b/tests/testthat/test_ppl.R @@ -62,14 +62,13 @@ test_that("mlr3book authors don't sleepwalk through life", { tasks = tsks(c("breast_cancer", "sonar")) - glrn_stack = as_learner(ppl("robustify") %>>% ppl("stacking", lrns(c("classif.rpart", "classif.debug")), - lrn("classif.log_reg") + lrn("classif.rpart", id = "classif.rpart2") )) glrn_stack$id = "Stack" learners = c(glrn_stack) - bmr = benchmark(benchmark_grid(tasks, learners, rsmp("cv", folds = 3))) + bmr = benchmark(benchmark_grid(tasks, learners, rsmp("cv", folds = 2))) }) From 47e3ee5a0af9e544230208c7dcc7d1ae04ee6caa Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 26 Mar 2024 17:57:54 +0100 Subject: [PATCH 43/46] adapt tests --- tests/testthat/test_mlr_graphs_stacking.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_mlr_graphs_stacking.R b/tests/testthat/test_mlr_graphs_stacking.R index 79b5f04b0..8cd0d6735 100644 --- a/tests/testthat/test_mlr_graphs_stacking.R +++ b/tests/testthat/test_mlr_graphs_stacking.R @@ -10,7 +10,7 @@ test_that("Stacking Pipeline", { # default graph_stack = pipeline_stacking(base_learners, super_learner) expect_graph(graph_stack) - expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion", "super.rpart")) + expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion_stacking", "super.rpart")) graph_learner = as_learner(graph_stack) graph_learner$train(tsk("iris")) expect_class(graph_learner$model$super.rpart$model, "rpart") @@ -19,7 +19,7 @@ test_that("Stacking Pipeline", { # no nop graph_stack = pipeline_stacking(base_learners, super_learner, use_features = FALSE) expect_graph(graph_stack) - expect_names(graph_stack$ids(), identical.to = c("base.rpart", "featureunion", "super.rpart")) + expect_names(graph_stack$ids(), identical.to = c("base.rpart", "featureunion_stacking", "super.rpart")) graph_learner = as_learner(graph_stack) graph_learner$train(tsk("iris")) expect_class(graph_learner$model$super.rpart$model, "rpart") @@ -28,7 +28,7 @@ test_that("Stacking Pipeline", { # folds graph_stack = pipeline_stacking(base_learners, super_learner, folds = 5) expect_graph(graph_stack) - expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion", "super.rpart")) + expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion_stacking", "super.rpart")) graph_learner = as_learner(graph_stack) graph_learner$train(tsk("iris")) expect_equal(graph_learner$graph$pipeops$base.rpart$param_set$values$resampling.folds, 5) @@ -38,7 +38,7 @@ test_that("Stacking Pipeline", { # insample graph_stack = pipeline_stacking(base_learners, super_learner, method = "insample") expect_graph(graph_stack) - expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion", "super.rpart")) + expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion_stacking", "super.rpart")) graph_learner = as_learner(graph_stack) graph_learner$train(tsk("iris")) expect_equal(graph_learner$graph$pipeops$base.rpart$param_set$values$resampling.method, "insample") From 54a5b05eeb5627eb4426ffa89d941fc8819586ec Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 26 Mar 2024 18:15:25 +0100 Subject: [PATCH 44/46] version bunp --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 10fb81ecd..7f605cbf5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mlr3pipelines Title: Preprocessing Operators and Pipelines for 'mlr3' -Version: 0.5.0-9000 +Version: 0.5.1 Authors@R: c(person(given = "Martin", family = "Binder", From 95b4c7289008c9bae25832cc6136ea65572d20da Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 27 Mar 2024 16:02:59 +0100 Subject: [PATCH 45/46] NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index f5f7ce5b0..52df70c12 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# mlr3pipelines 0.5.0-9000 +# mlr3pipelines 0.5.1 * Changed the ID of `PipeOpFeatureUnion` used in `ppl("robustify")` and `ppl("stacking")`. * `pipeline_bagging()` gets the `replace` argument (old behaviour `FALSE` by default). From a4933e1b925d6d6c9c4a24794a441367040989e8 Mon Sep 17 00:00:00 2001 From: mb706 Date: Wed, 27 Mar 2024 16:04:17 +0100 Subject: [PATCH 46/46] version bunp --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7f605cbf5..13937023b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mlr3pipelines Title: Preprocessing Operators and Pipelines for 'mlr3' -Version: 0.5.1 +Version: 0.5.1-9000 Authors@R: c(person(given = "Martin", family = "Binder", diff --git a/NEWS.md b/NEWS.md index 52df70c12..5de5bfe09 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# mlr3pipelines 0.5.1-9000 + # mlr3pipelines 0.5.1 * Changed the ID of `PipeOpFeatureUnion` used in `ppl("robustify")` and `ppl("stacking")`.