diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..5ace4600a --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/dev-cmd-check.yml b/.github/workflows/dev-cmd-check.yml new file mode 100644 index 000000000..77e4a431e --- /dev/null +++ b/.github/workflows/dev-cmd-check.yml @@ -0,0 +1,45 @@ +# dev cmd check workflow of the mlr3 ecosystem v0.1.0 +# https://github.com/mlr-org/actions +on: + workflow_dispatch: + push: + branches: + - master + pull_request: + branches: + - master + +name: dev-check + +jobs: + check-package: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.dev-package }} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + strategy: + fail-fast: false + matrix: + config: + - {os: ubuntu-latest, r: 'release', dev-package: "mlr-org/bbotk', 'mlr-org/mlr3learners', 'mlr-org/paradox"} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - name: Install dev versions + run: pak::pkg_install(c('${{ matrix.config.dev-package }}')) + shell: Rscript {0} + + - uses: r-lib/actions/check-r-package@v2 diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml index 09f42a5a8..85cf2ff72 100644 --- a/.github/workflows/pkgdown.yml +++ b/.github/workflows/pkgdown.yml @@ -23,7 +23,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -44,7 +44,7 @@ jobs: - name: Deploy if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.4.1 + uses: JamesIves/github-pages-deploy-action@v4.5.0 with: clean: false branch: gh-pages diff --git a/.github/workflows/r-cmd-check.yml b/.github/workflows/r-cmd-check.yml index fbd78ab3c..8d13d0e56 100644 --- a/.github/workflows/r-cmd-check.yml +++ b/.github/workflows/r-cmd-check.yml @@ -28,7 +28,7 @@ jobs: - {os: ubuntu-latest, r: 'release'} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: diff --git a/DESCRIPTION b/DESCRIPTION index 28e01a1ff..eed8ae229 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mlr3pipelines Title: Preprocessing Operators and Pipelines for 'mlr3' -Version: 0.5.0-9000 +Version: 0.5.1-9000 Authors@R: c(person(given = "Martin", family = "Binder", diff --git a/NEWS.md b/NEWS.md index d766be9b3..1923c3e3a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,14 @@ -# mlr3pipelines 0.5.0-9000 +# mlr3pipelines 0.5.1-9000 * Added marshaling support to `GraphLearner` +# mlr3pipelines 0.5.1 + +* Changed the ID of `PipeOpFeatureUnion` used in `ppl("robustify")` and `ppl("stacking")`. +* `pipeline_bagging()` gets the `replace` argument (old behaviour `FALSE` by default). +* Feature: The `$add_pipeop()` method got an argument `clone` (old behaviour `TRUE` by default). +* Bugfix: `PipeOpFeatureUnion` in some rare cases dropped variables called `"x"`. +* Compatibility with upcoming paradox release. # mlr3pipelines 0.5.0-2 diff --git a/R/Graph.R b/R/Graph.R index 8cc95a0ae..7df319808 100644 --- a/R/Graph.R +++ b/R/Graph.R @@ -59,7 +59,7 @@ #' * `phash` :: `character(1)` \cr #' Stores a checksum calculated on the [`Graph`] configuration, which includes all [`PipeOp`] hashes #' *except* their `$param_set$values`, and a hash of `$edges`. -#' * `keep_results` :: `logical(1)` \cr +#' * `keep_results` :: `logical(1)`\cr #' Whether to store intermediate results in the [`PipeOp`]'s `$.result` slot, mostly for debugging purposes. Default `FALSE`. #' * `man` :: `character(1)`\cr #' Identifying string of the help page that shows with `help()`. @@ -69,13 +69,14 @@ #' (`logical(1)`) -> `character` \cr #' Get IDs of all [`PipeOp`]s. This is in order that [`PipeOp`]s were added if #' `sorted` is `FALSE`, and topologically sorted if `sorted` is `TRUE`. -#' * `add_pipeop(op)` \cr -#' ([`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`) -> `self` \cr +#' * `add_pipeop(op, clone = TRUE)` \cr +#' ([`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`, `logical(1)`) -> `self` \cr #' Mutates [`Graph`] by adding a [`PipeOp`] to the [`Graph`]. This does not add any edges, so the new [`PipeOp`] #' will not be connected within the [`Graph`] at first.\cr #' Instead of supplying a [`PipeOp`] directly, an object that can naturally be converted to a [`PipeOp`] can also #' be supplied, e.g. a [`Learner`][mlr3::Learner] or a [`Filter`][mlr3filters::Filter]; see [`as_pipeop()`]. -#' The argument given as `op` is always cloned; to access a `Graph`'s [`PipeOp`]s by-reference, use `$pipeops`.\cr +#' The argument given as `op` is cloned if `clone` is `TRUE` (default); to access a `Graph`'s [`PipeOp`]s +#' by-reference, use `$pipeops`.\cr #' Note that `$add_pipeop()` is a relatively low-level operation, it is recommended to build graphs using [`%>>%`]. #' * `add_edge(src_id, dst_id, src_channel = NULL, dst_channel = NULL)` \cr #' (`character(1)`, `character(1)`, @@ -181,8 +182,8 @@ Graph = R6Class("Graph", topo_sort(tmp)$id }, - add_pipeop = function(op) { - op = as_pipeop(op, clone = TRUE) + add_pipeop = function(op, clone = TRUE) { + op = as_pipeop(op, clone = assert_flag(clone)) if (op$id %in% names(self$pipeops)) { stopf("PipeOp with id '%s' already in Graph", op$id) } diff --git a/R/LearnerAvg.R b/R/LearnerAvg.R index fd2be1883..6e9d53c45 100644 --- a/R/LearnerAvg.R +++ b/R/LearnerAvg.R @@ -58,12 +58,12 @@ LearnerClassifAvg = R6Class("LearnerClassifAvg", inherit = LearnerClassif, public = list( initialize = function(id = "classif.avg") { - ps = ParamSet$new(params = list( - ParamUty$new("measure", custom_check = check_class_or_character("MeasureClassif", mlr_measures), tags = "train"), - ParamUty$new("optimizer", custom_check = check_optimizer, tags = "train"), - ParamUty$new("log_level", tags = "train", + ps = ps( + measure = p_uty(custom_check = check_class_or_character("MeasureClassif", mlr_measures), tags = "train"), + optimizer = p_uty(custom_check = check_optimizer, tags = "train"), + log_level = p_uty(tags = "train", function(x) check_string(x) %check||% check_integerish(x)) - )) + ) ps$values = list(measure = "classif.ce", optimizer = "nloptr", log_level = "warn") super$initialize( id = id, @@ -132,12 +132,12 @@ LearnerClassifAvg = R6Class("LearnerClassifAvg", inherit = LearnerClassif, LearnerRegrAvg = R6Class("LearnerRegrAvg", inherit = LearnerRegr, public = list( initialize = function(id = "regr.avg") { - ps = ParamSet$new(params = list( - ParamUty$new("measure", custom_check = check_class_or_character("MeasureRegr", mlr_measures), tags = "train"), - ParamUty$new("optimizer", custom_check = check_optimizer, tags = "train"), - ParamUty$new("log_level", tags = "train", + ps = ps( + measure = p_uty(custom_check = check_class_or_character("MeasureRegr", mlr_measures), tags = "train"), + optimizer = p_uty(custom_check = check_optimizer, tags = "train"), + log_level = p_uty(tags = "train", function(x) check_string(x) %check||% check_integerish(x)) - )) + ) ps$values = list(measure = "regr.mse", optimizer = "nloptr", log_level = "warn") super$initialize( id = id, @@ -185,10 +185,9 @@ optimize_weights_learneravg = function(self, task, n_weights, data) { } pars = self$param_set$get_values(tags = "train") - ps = ParamSet$new(params = imap(data, function(x, n) { - if (is.numeric(n)) n = paste0("w.", n) - ParamDbl$new(id = n, lower = 0, upper = 1) - })) + pl = rep(list(p_dbl(0, 1)), length(data)) + names(pl) = names(data) %??% paste0("w.", seq_along(data)) + ps = do.call(ps, pl) optimizer = pars$optimizer if (inherits(optimizer, "character")) { optimizer = bbotk::opt(optimizer) @@ -198,7 +197,7 @@ optimize_weights_learneravg = function(self, task, n_weights, data) { } measure = pars$measure if (is.character(measure)) measure = msr(measure) - codomain = ParamSet$new(list(ParamDbl$new(id = measure$id, tags = ifelse(measure$minimize, "minimize", "maximize")))) + codomain = do.call(paradox::ps, structure(list(p_dbl(tags = ifelse(measure$minimize, "minimize", "maximize"))), names = measure$id)) objfun = bbotk::ObjectiveRFun$new( fun = function(xs) learneravg_objfun(xs, task = task, measure = measure, avg_weight_fun = self$weighted_average_prediction, data = data), domain = ps, codomain = codomain diff --git a/R/PipeOp.R b/R/PipeOp.R index 24a12ab6c..9c8821f57 100644 --- a/R/PipeOp.R +++ b/R/PipeOp.R @@ -38,7 +38,7 @@ #' #' @section Construction: #' ``` -#' PipeOp$new(id, param_set = ParamSet$new(), param_vals = list(), input, output, packages = character(0), tags = character(0)) +#' PipeOp$new(id, param_set = ps(), param_vals = list(), input, output, packages = character(0), tags = character(0)) #' ``` #' #' * `id` :: `character(1)`\cr @@ -236,7 +236,7 @@ PipeOp = R6Class("PipeOp", .result = NULL, tags = NULL, - initialize = function(id, param_set = ParamSet$new(), param_vals = list(), input, output, packages = character(0), tags = "abstract") { + initialize = function(id, param_set = ps(), param_vals = list(), input, output, packages = character(0), tags = "abstract") { if (inherits(param_set, "ParamSet")) { private$.param_set = assert_param_set(param_set) private$.param_set_source = NULL @@ -338,7 +338,7 @@ PipeOp = R6Class("PipeOp", id = function(val) { if (!missing(val)) { private$.id = val - if (!is.null(private$.param_set)) { + if (paradox_info$is_old && !is.null(private$.param_set)) { # private$.param_set may be NULL if it is constructed dynamically by active binding private$.param_set$set_id = val } @@ -353,7 +353,7 @@ PipeOp = R6Class("PipeOp", } else { private$.param_set = sourcelist[[1]] } - if (!is.null(self$id)) { + if (paradox_info$is_old && !is.null(self$id)) { private$.param_set$set_id = self$id } } diff --git a/R/PipeOpBoxCox.R b/R/PipeOpBoxCox.R index 19034d9dc..43924c7f2 100644 --- a/R/PipeOpBoxCox.R +++ b/R/PipeOpBoxCox.R @@ -65,12 +65,12 @@ PipeOpBoxCox = R6Class("PipeOpBoxCox", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "boxcox", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamLgl$new("standardize", default = TRUE, tags = c("train", "boxcox")), - ParamDbl$new("eps", default = 0.001, lower = 0, tags = c("train", "boxcox")), - ParamDbl$new("lower", tags = c("train", "boxcox")), - ParamDbl$new("upper", tags = c("train", "boxcox")) - )) + ps = ps( + standardize = p_lgl(default = TRUE, tags = c("train", "boxcox")), + eps = p_dbl(default = 0.001, lower = 0, tags = c("train", "boxcox")), + lower = p_dbl(tags = c("train", "boxcox")), + upper = p_dbl(tags = c("train", "boxcox")) + ) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "bestNormalize", feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpBranch.R b/R/PipeOpBranch.R index 1952125f0..25f48efef 100644 --- a/R/PipeOpBranch.R +++ b/R/PipeOpBranch.R @@ -19,9 +19,9 @@ #' * `options` :: `numeric(1)` | `character`\cr #' If `options` is an integer number, it determines the number of #' output channels / options that are created, named `output1`...`output`. The -#' `$selection` parameter will then be a [`ParamInt`]. +#' `$selection` parameter will then be an integer. #' If `options` is a `character`, it determines the names of channels directly. -#' The `$selection` parameter will then be a [`ParamFct`]. +#' The `$selection` parameter will then be factorial. #' * `id` :: `character(1)`\cr #' Identifier of resulting object, default `"branch"`. #' * `param_vals` :: named `list`\cr @@ -90,14 +90,14 @@ PipeOpBranch = R6Class("PipeOpBranch", ) if (is.numeric(options)) { options = round(options) - param = ParamInt$new("selection", lower = 1L, upper = options, tags = c("train", "predict", "required")) + param = p_int(lower = 1L, upper = options, tags = c("train", "predict", "required")) options = rep_suffix("output", options) initval = 1 } else { - param = ParamFct$new("selection", levels = options, tags = c("train", "predict", "required")) + param = p_fct(options, tags = c("train", "predict", "required")) initval = options[1] } - ps = ParamSet$new(params = list(param)) + ps = ps(selection = param) ps$values$selection = initval super$initialize(id, ps, param_vals, input = data.table(name = "input", train = "*", predict = "*"), diff --git a/R/PipeOpChunk.R b/R/PipeOpChunk.R index 57b69ad7a..03aacce79 100644 --- a/R/PipeOpChunk.R +++ b/R/PipeOpChunk.R @@ -64,9 +64,9 @@ PipeOpChunk = R6Class("PipeOpChunk", public = list( initialize = function(outnum, id = "chunk", param_vals = list()) { outnum = assert_int(outnum, lower = 1L) - ps = ParamSet$new(params = list( - ParamLgl$new("shuffle", tags = "train") - )) + ps = ps( + shuffle = p_lgl(tags = "train") + ) ps$values = list(shuffle = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, diff --git a/R/PipeOpClassBalancing.R b/R/PipeOpClassBalancing.R index 507512f6f..40be0b558 100644 --- a/R/PipeOpClassBalancing.R +++ b/R/PipeOpClassBalancing.R @@ -104,14 +104,12 @@ PipeOpClassBalancing = R6Class("PipeOpClassBalancing", public = list( initialize = function(id = "classbalancing", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamDbl$new("ratio", lower = 0, upper = Inf, tags = "train"), - ParamFct$new("reference", - levels = c("all", "major", "minor", "nonmajor", "nonminor", "one"), tags = "train"), - ParamFct$new("adjust", - levels = c("all", "major", "minor", "nonmajor", "nonminor", "upsample", "downsample"), tags = "train"), - ParamLgl$new("shuffle", tags = "train") - )) + ps = ps( + ratio = p_dbl(lower = 0, upper = Inf, tags = "train"), + reference = p_fct(c("all", "major", "minor", "nonmajor", "nonminor", "one"), tags = "train"), + adjust = p_fct(c("all", "major", "minor", "nonmajor", "nonminor", "upsample", "downsample"), tags = "train"), + shuffle = p_lgl(tags = "train") + ) ps$values = list(ratio = 1, reference = "all", adjust = "all", shuffle = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, task_type = "TaskClassif", tags = "imbalanced data") } diff --git a/R/PipeOpClassWeights.R b/R/PipeOpClassWeights.R index 802af61da..9764b3535 100644 --- a/R/PipeOpClassWeights.R +++ b/R/PipeOpClassWeights.R @@ -71,9 +71,9 @@ PipeOpClassWeights = R6Class("PipeOpClassWeights", public = list( initialize = function(id = "classweights", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamDbl$new("minor_weight", lower = 0, upper = Inf, tags = "train") - )) + ps = ps( + minor_weight = p_dbl(lower = 0, upper = Inf, tags = "train") + ) ps$values = list(minor_weight = 1) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, task_type = "TaskClassif", tags = "imbalanced data") } diff --git a/R/PipeOpColApply.R b/R/PipeOpColApply.R index f4366149d..da1f78f94 100644 --- a/R/PipeOpColApply.R +++ b/R/PipeOpColApply.R @@ -92,9 +92,9 @@ PipeOpColApply = R6Class("PipeOpColApply", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "colapply", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamUty$new("applicator", custom_check = check_function, tags = c("train", "predict")) - )) + ps = ps( + applicator = p_uty(custom_check = check_function, tags = c("train", "predict")) + ) ps$values = list(applicator = identity) super$initialize(id, ps, param_vals = param_vals) } diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R index 39173a54e..606572854 100644 --- a/R/PipeOpColRoles.R +++ b/R/PipeOpColRoles.R @@ -56,9 +56,9 @@ PipeOpColRoles = R6Class("PipeOpColRoles", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "colroles", param_vals = list()) { - ps = ParamSet$new(params = list( + ps = ps( # named list, each entry with a vector of roles - ParamUty$new("new_role", tags = c("train", "predict"), custom_check = function(x) { + new_role = p_uty(tags = c("train", "predict"), custom_check = function(x) { first_check = check_list(x, types = "character", any.missing = FALSE, min.len = 1L, names = "named") # return the error directly if this failed if (is.character(first_check)) { @@ -69,7 +69,7 @@ PipeOpColRoles = R6Class("PipeOpColRoles", all_col_roles = unique(unlist(mlr3::mlr_reflections$task_col_roles)) check_subset(unlist(x), all_col_roles[all_col_roles != "target"]) }) - )) + ) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE) } ), diff --git a/R/PipeOpCollapseFactors.R b/R/PipeOpCollapseFactors.R index c0b94bee7..7d9532e8e 100644 --- a/R/PipeOpCollapseFactors.R +++ b/R/PipeOpCollapseFactors.R @@ -59,10 +59,10 @@ PipeOpCollapseFactors = R6Class("PipeOpCollapseFactors", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "collapsefactors", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamDbl$new("no_collapse_above_prevalence", 0, 1, tags = c("train", "predict")), - ParamInt$new("target_level_count", 2, tags = c("train", "predict")) - )) + ps = ps( + no_collapse_above_prevalence = p_dbl(0, 1, tags = c("train", "predict")), + target_level_count = p_int(2, tags = c("train", "predict")) + ) ps$values = list(no_collapse_above_prevalence = 1, target_level_count = 2) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("factor", "ordered")) } diff --git a/R/PipeOpDateFeatures.R b/R/PipeOpDateFeatures.R index b49e8e5f1..d9908265f 100644 --- a/R/PipeOpDateFeatures.R +++ b/R/PipeOpDateFeatures.R @@ -98,20 +98,20 @@ PipeOpDateFeatures = R6Class("PipeOpDateFeatures", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "datefeatures", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamLgl$new("keep_date_var", tags = c("train", "predict", "required")), - ParamLgl$new("cyclic", tags = c("train", "predict", "required")), - ParamLgl$new("year", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("month", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("week_of_year", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("day_of_year", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("day_of_month", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("day_of_week", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("hour", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("minute", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("second", tags = c("train", "predict", "datepart", "required")), - ParamLgl$new("is_day", tags = c("train", "predict", "datepart", "required")) - )) + ps = ps( + keep_date_var = p_lgl(tags = c("train", "predict", "required")), + cyclic = p_lgl(tags = c("train", "predict", "required")), + year = p_lgl(tags = c("train", "predict", "datepart", "required")), + month = p_lgl(tags = c("train", "predict", "datepart", "required")), + week_of_year = p_lgl(tags = c("train", "predict", "datepart", "required")), + day_of_year = p_lgl(tags = c("train", "predict", "datepart", "required")), + day_of_month = p_lgl(tags = c("train", "predict", "datepart", "required")), + day_of_week = p_lgl(tags = c("train", "predict", "datepart", "required")), + hour = p_lgl(tags = c("train", "predict", "datepart", "required")), + minute = p_lgl(tags = c("train", "predict", "datepart", "required")), + second = p_lgl(tags = c("train", "predict", "datepart", "required")), + is_day = p_lgl(tags = c("train", "predict", "datepart", "required")) + ) ps$values = list(keep_date_var = FALSE, cyclic = FALSE, year = TRUE, month = TRUE, week_of_year = TRUE, day_of_year = TRUE, day_of_month = TRUE, day_of_week = TRUE, hour = TRUE, minute = TRUE, second = TRUE, is_day = TRUE) diff --git a/R/PipeOpEncode.R b/R/PipeOpEncode.R index 8addb74f9..35e630525 100644 --- a/R/PipeOpEncode.R +++ b/R/PipeOpEncode.R @@ -82,9 +82,9 @@ PipeOpEncode = R6Class("PipeOpEncode", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "encode", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamFct$new("method", levels = c("one-hot", "treatment", "helmert", "poly", "sum"), tags = c("train", "predict")) - )) + ps = ps( + method = p_fct(levels = c("one-hot", "treatment", "helmert", "poly", "sum"), tags = c("train", "predict")) + ) ps$values = list(method = "one-hot") super$initialize(id, param_set = ps, param_vals = param_vals, packages = "stats", tags = "encode", feature_types = c("factor", "ordered")) } diff --git a/R/PipeOpEncodeImpact.R b/R/PipeOpEncodeImpact.R index 444f5911c..5de079907 100644 --- a/R/PipeOpEncodeImpact.R +++ b/R/PipeOpEncodeImpact.R @@ -77,10 +77,10 @@ PipeOpEncodeImpact = R6Class("PipeOpEncodeImpact", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "encodeimpact", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamDbl$new("smoothing", 0, Inf, tags = c("train", "required")), - ParamLgl$new("impute_zero", tags = c("train", "required")) - )) + ps = ps( + smoothing = p_dbl(0, Inf, tags = c("train", "required")), + impute_zero = p_lgl(tags = c("train", "required")) + ) ps$values = list(smoothing = 1e-4, impute_zero = FALSE) super$initialize(id, param_set = ps, param_vals = param_vals, tags = "encode", feature_types = c("factor", "ordered")) } diff --git a/R/PipeOpEncodeLmer.R b/R/PipeOpEncodeLmer.R index 5705cac2c..abdb031b0 100644 --- a/R/PipeOpEncodeLmer.R +++ b/R/PipeOpEncodeLmer.R @@ -88,9 +88,9 @@ PipeOpEncodeLmer = R6Class("PipeOpEncodeLmer", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "encodelmer", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamLgl$new("fast_optim", tags = c("train", "required")) - )) + ps = ps( + fast_optim = p_lgl(tags = c("train", "required")) + ) ps$values = list(fast_optim = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, packages = c("lme4", "nloptr"), tags = "encode", feature_types = c("factor", "ordered")) } diff --git a/R/PipeOpEnsemble.R b/R/PipeOpEnsemble.R index 255237a41..df3bcab98 100644 --- a/R/PipeOpEnsemble.R +++ b/R/PipeOpEnsemble.R @@ -10,7 +10,7 @@ #' @section Construction: #' Note: This object is typically constructed via a derived class, e.g. [`PipeOpClassifAvg`] or [`PipeOpRegrAvg`]. #' ``` -#' PipeOpEnsemble$new(innum = 0, collect_multiplicity = FALSE, id, param_set = ParamSet$new(), param_vals = list(), packages = character(0), prediction_type = "Prediction") +#' PipeOpEnsemble$new(innum = 0, collect_multiplicity = FALSE, id, param_set = ps(), param_vals = list(), packages = character(0), prediction_type = "Prediction") #' ``` #' #' * `innum` :: `numeric(1)`\cr @@ -82,9 +82,14 @@ PipeOpEnsemble = R6Class("PipeOpEnsemble", inherit = PipeOp, public = list( - initialize = function(innum = 0, collect_multiplicity = FALSE, id, param_set = ParamSet$new(), param_vals = list(), packages = character(0), prediction_type = "Prediction", tags = NULL) { + initialize = function(innum = 0, collect_multiplicity = FALSE, id, param_set = ps(), param_vals = list(), packages = character(0), prediction_type = "Prediction", tags = NULL) { assert_integerish(innum, lower = 0) - param_set$add(ParamUty$new("weights", custom_check = check_weights(innum), tags = "predict")) + if (paradox_info$is_old) { + paux = ps(weights = p_uty(check_weights(innum), tags = "predict")) + param_set$add(paux$params$weights) + } else { + param_set = c(param_set, ps(weights = p_uty(check_weights(innum), tags = "predict"))) + } param_set$values$weights = 1 inname = if (innum) rep_suffix("input", innum) else "..." intype = c("NULL", prediction_type) diff --git a/R/PipeOpFeatureUnion.R b/R/PipeOpFeatureUnion.R index 8a2c1b4ce..bbc1e890b 100644 --- a/R/PipeOpFeatureUnion.R +++ b/R/PipeOpFeatureUnion.R @@ -202,6 +202,7 @@ cbind_tasks = function(inputs, assert_targets_equal, inprefix) { # again done by reference new_features = unlist(c(list(data.table(x = vector(length = task$nrow))), map(tail(inputs, -1L), .f = function(y) y$data(ids, cols = y$feature_names))), recursive = FALSE) + names(new_features)[1] = make.unique(rev(names(new_features)))[[length(new_features)]] # we explicitly have to subset to the unique column names, otherwise task$cbind() complains for data.table backends new_features = new_features[unique(names(new_features))] diff --git a/R/PipeOpFilter.R b/R/PipeOpFilter.R index 06d4167a6..d8657dce4 100644 --- a/R/PipeOpFilter.R +++ b/R/PipeOpFilter.R @@ -114,16 +114,24 @@ PipeOpFilter = R6Class("PipeOpFilter", initialize = function(filter, id = filter$id, param_vals = list()) { assert_class(filter, "Filter") self$filter = filter$clone(deep = TRUE) - self$filter$param_set$set_id = "" - map(self$filter$param_set$params, function(p) p$tags = union(p$tags, "train")) - private$.outer_param_set = ParamSet$new(list( - ParamInt$new("nfeat", lower = 0, tags = "train"), - ParamDbl$new("frac", lower = 0, upper = 1, tags = "train"), - ParamDbl$new("cutoff", tags = "train"), - ParamInt$new("permuted", lower = 1, tags = "train") - )) - private$.outer_param_set$set_id = "filter" - super$initialize(id, alist(private$.outer_param_set, self$filter$param_set), param_vals = param_vals, tags = "feature selection") + if (paradox_info$is_old) { + self$filter$param_set$set_id = "" + map(self$filter$param_set$params, function(p) p$tags = union(p$tags, "train")) + } else { + for (pn in self$filter$param_set$ids()) { + self$filter$param_set$tags[[pn]] = union(self$filter$param_set$tags[[pn]] , "train") + } + } + private$.outer_param_set = ps( + nfeat = p_int(lower = 0, tags = "train"), + frac = p_dbl(lower = 0, upper = 1, tags = "train"), + cutoff = p_dbl(tags = "train"), + permuted = p_int(lower = 1, tags = "train") + ) + if (paradox_info$is_old) { + private$.outer_param_set$set_id = "filter" + } + super$initialize(id, alist(filter = private$.outer_param_set, self$filter$param_set), param_vals = param_vals, tags = "feature selection") } ), private = list( @@ -187,4 +195,4 @@ PipeOpFilter = R6Class("PipeOpFilter", ) ) -mlr_pipeops$add("filter", PipeOpFilter, list(R6Class("Filter", public = list(id = "dummyfilter", param_set = ParamSet$new()))$new())) +mlr_pipeops$add("filter", PipeOpFilter, list(R6Class("Filter", public = list(id = "dummyfilter", param_set = ps()))$new())) diff --git a/R/PipeOpFixFactors.R b/R/PipeOpFixFactors.R index 937dea1d3..57f3dfeff 100644 --- a/R/PipeOpFixFactors.R +++ b/R/PipeOpFixFactors.R @@ -51,9 +51,9 @@ PipeOpFixFactors = R6Class("PipeOpFixFactors", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "fixfactors", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamLgl$new("droplevels", tags = c("train", "predict")) - )) + ps = ps( + droplevels = p_lgl(tags = c("train", "predict")) + ) ps$values = list(droplevels = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, tags = "robustify", feature_types = c("factor", "ordered")) } diff --git a/R/PipeOpHistBin.R b/R/PipeOpHistBin.R index f4924d0fd..921145826 100644 --- a/R/PipeOpHistBin.R +++ b/R/PipeOpHistBin.R @@ -64,9 +64,9 @@ PipeOpHistBin = R6Class("PipeOpHistBin", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "histbin", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamUty$new("breaks", default = "Sturges", tags = c("train", "hist")) - )) + ps = ps( + breaks = p_uty(default = "Sturges", tags = c("train", "hist")) + ) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "graphics", feature_types = c("numeric", "integer")) } ), diff --git a/R/PipeOpICA.R b/R/PipeOpICA.R index ff56a54fd..9f363ed3e 100644 --- a/R/PipeOpICA.R +++ b/R/PipeOpICA.R @@ -90,19 +90,19 @@ PipeOpICA = R6Class("PipeOpICA", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "ica", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamInt$new("n.comp", lower = 1, upper = Inf, tags = c("train", "ica")), - ParamFct$new("alg.typ", levels = c("parallel", "deflation"), + ps = ps( + n.comp = p_int(lower = 1, upper = Inf, tags = c("train", "ica")), + alg.typ = p_fct(levels = c("parallel", "deflation"), default = "parallel", tags = c("train", "ica")), - ParamFct$new("fun", default = "logcosh", levels = c("logcosh", "exp"), tags = c("train", "ica")), - ParamDbl$new("alpha", default = 1.0, lower = 1, upper = 2, tags = c("train", "ica")), - ParamFct$new("method", default = "R", levels = c("C", "R"), tags = c("train", "ica")), - ParamLgl$new("row.norm", default = FALSE, tags = c("train", "ica")), - ParamInt$new("maxit", default = 200, lower = 1, tags = c("train", "ica")), - ParamDbl$new("tol", default = 1e-04, lower = 0, tags = c("train", "ica")), - ParamLgl$new("verbose", default = FALSE, tags = c("train", "ica")), - ParamUty$new("w.init", default = NULL, tags = c("train", "ica")) - )) + fun = p_fct(default = "logcosh", levels = c("logcosh", "exp"), tags = c("train", "ica")), + alpha = p_dbl(default = 1.0, lower = 1, upper = 2, tags = c("train", "ica")), + method = p_fct(default = "R", levels = c("C", "R"), tags = c("train", "ica")), + row.norm = p_lgl(default = FALSE, tags = c("train", "ica")), + maxit = p_int(default = 200, lower = 1, tags = c("train", "ica")), + tol = p_dbl(default = 1e-04, lower = 0, tags = c("train", "ica")), + verbose = p_lgl(default = FALSE, tags = c("train", "ica")), + w.init = p_uty(default = NULL, tags = c("train", "ica")) + ) ps$values = list(method = "C") super$initialize(id, param_set = ps, param_vals = param_vals, packages = "fastICA", feature_types = c("numeric", "integer")) diff --git a/R/PipeOpImpute.R b/R/PipeOpImpute.R index 3e8f52906..ea21018f2 100644 --- a/R/PipeOpImpute.R +++ b/R/PipeOpImpute.R @@ -8,7 +8,7 @@ #' #' @section Construction: #' ``` -#' PipeOpImpute$$new(id, param_set = ParamSet$new(), param_vals = list(), whole_task_dependent = FALSE, packages = character(0), task_type = "Task") +#' PipeOpImpute$$new(id, param_set = ps(), param_vals = list(), whole_task_dependent = FALSE, packages = character(0), task_type = "Task") #' ``` #' #' * `id` :: `character(1)`\cr @@ -110,18 +110,22 @@ PipeOpImpute = R6Class("PipeOpImpute", inherit = PipeOp, public = list( - initialize = function(id, param_set = ParamSet$new(), param_vals = list(), whole_task_dependent = FALSE, packages = character(0), task_type = "Task", feature_types = mlr_reflections$task_feature_types) { + initialize = function(id, param_set = ps(), param_vals = list(), whole_task_dependent = FALSE, packages = character(0), task_type = "Task", feature_types = mlr_reflections$task_feature_types) { # add one or two parameters: affect_columns (always) and context_columns (if whole_task_dependent is TRUE) - addparams = list(ParamUty$new("affect_columns", custom_check = check_function_or_null, tags = "train")) + addparams = list(affect_columns = p_uty(custom_check = check_function_or_null, tags = "train")) if (whole_task_dependent) { - addparams = c(addparams, list(ParamUty$new("context_columns", custom_check = check_function_or_null, tags = "train"))) + addparams = c(addparams, list(context_columns = p_uty(custom_check = check_function_or_null, tags = "train"))) } - + affectcols_ps = do.call(ps, addparams) # ParamSetCollection handles adding of new parameters differently if (inherits(param_set, "ParamSet")) { - lapply(addparams, param_set$add) + if (paradox_info$is_old) { + lapply(affectcols_ps$params, param_set$add) + } else { + param_set = c(param_set, affectcols_ps) + } } else { - private$.affectcols_ps = ParamSet$new(addparams) + private$.affectcols_ps = affectcols_ps param_set = c(param_set, alist(private$.affectcols_ps)) } private$.feature_types = assert_subset(feature_types, mlr_reflections$task_feature_types) diff --git a/R/PipeOpImputeConstant.R b/R/PipeOpImputeConstant.R index 22aaa8431..4554a28bd 100644 --- a/R/PipeOpImputeConstant.R +++ b/R/PipeOpImputeConstant.R @@ -69,10 +69,10 @@ PipeOpImputeConstant = R6Class("PipeOpImputeConstant", inherit = PipeOpImpute, public = list( initialize = function(id = "imputeconstant", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamUty$new("constant", tags = c("train", "required"), custom_check = check_scalar), - ParamLgl$new("check_levels", tags = c("train", "required")) - )) + ps = ps( + constant = p_uty(tags = c("train", "required"), custom_check = check_scalar), + check_levels = p_lgl(tags = c("train", "required")) + ) ps$values = list(constant = ".MISSING", check_levels = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("logical", "integer", "numeric", "character", "factor", "ordered", "POSIXct")) } diff --git a/R/PipeOpImputeLearner.R b/R/PipeOpImputeLearner.R index 611dafb5c..3ff7662f1 100644 --- a/R/PipeOpImputeLearner.R +++ b/R/PipeOpImputeLearner.R @@ -101,7 +101,9 @@ PipeOpImputeLearner = R6Class("PipeOpImputeLearner", public = list( initialize = function(learner, id = "imputelearner", param_vals = list()) { private$.learner = as_learner(learner, clone = TRUE) - private$.learner$param_set$set_id = "" + if (paradox_info$is_old) { + private$.learner$param_set$set_id = "" + } id = id %??% private$.learner$id feature_types = switch(private$.learner$task_type, regr = c("integer", "numeric"), @@ -196,7 +198,7 @@ PipeOpImputeLearner = R6Class("PipeOpImputeLearner", ) ) -mlr_pipeops$add("imputelearner", PipeOpImputeLearner, list(R6Class("Learner", public = list(id = "learner", task_type = "classif", param_set = ParamSet$new()))$new())) +mlr_pipeops$add("imputelearner", PipeOpImputeLearner, list(R6Class("Learner", public = list(id = "learner", task_type = "classif", param_set = ps()))$new())) # See mlr-org/mlr#470 convert_to_task = function(id = "imputing", data, target, task_type, ...) { diff --git a/R/PipeOpImputeOOR.R b/R/PipeOpImputeOOR.R index 4236e91c2..456777ce9 100644 --- a/R/PipeOpImputeOOR.R +++ b/R/PipeOpImputeOOR.R @@ -80,10 +80,10 @@ PipeOpImputeOOR = R6Class("PipeOpImputeOOR", inherit = PipeOpImpute, public = list( initialize = function(id = "imputeoor", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamLgl$new("min", tags = c("train", "predict")), - ParamDbl$new("offset", lower = 0, tags = c("train", "predict")), - ParamDbl$new("multiplier", lower = 0, tags = c("train", "predict"))) + ps = ps( + min = p_lgl(tags = c("train", "predict")), + offset = p_dbl(lower = 0, tags = c("train", "predict")), + multiplier = p_dbl(lower = 0, tags = c("train", "predict")) ) ps$values = list(min = TRUE, offset = 1, multiplier = 1) # this is one of the few imputers that handles 'character' features! diff --git a/R/PipeOpKernelPCA.R b/R/PipeOpKernelPCA.R index 6b666ed32..5c24b21d4 100644 --- a/R/PipeOpKernelPCA.R +++ b/R/PipeOpKernelPCA.R @@ -67,14 +67,14 @@ PipeOpKernelPCA = R6Class("PipeOpKernelPCA", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "kernelpca", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamFct$new("kernel", default = "rbfdot", levels = c("rbfdot", "polydot", + ps = ps( + kernel = p_fct(default = "rbfdot", levels = c("rbfdot", "polydot", "vanilladot", "tanhdot", "laplacedot", "besseldot", "anovadot", "splinedot"), tags = c("train", "kpca")), - ParamUty$new("kpar", tags = c("train", "kpca")), - ParamInt$new("features", default = 0, lower = 0, tags = c("train", "kpca")), - ParamDbl$new("th", default = 1e-04, lower = 0, tags = c("train", "kpca")), - ParamUty$new("na.action", default = stats::na.omit, tags = c("train", "kpca")) - )) + kpar = p_uty(tags = c("train", "kpca")), + features = p_int(default = 0, lower = 0, tags = c("train", "kpca")), + th = p_dbl(default = 1e-04, lower = 0, tags = c("train", "kpca")), + na.action = p_uty(default = stats::na.omit, tags = c("train", "kpca")) + ) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "kernlab", feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpLearner.R b/R/PipeOpLearner.R index 0bfdc1445..b35949e78 100644 --- a/R/PipeOpLearner.R +++ b/R/PipeOpLearner.R @@ -99,7 +99,9 @@ PipeOpLearner = R6Class("PipeOpLearner", inherit = PipeOp, id = function(val) { if (!missing(val)) { private$.id = val - private$.learner$param_set$set_id = val + if (paradox_info$is_old) { + private$.learner$param_set$set_id = val + } } private$.id }, @@ -152,4 +154,4 @@ PipeOpLearner = R6Class("PipeOpLearner", inherit = PipeOp, ) ) -mlr_pipeops$add("learner", PipeOpLearner, list(R6Class("Learner", public = list(id = "learner", task_type = "classif", param_set = ParamSet$new(), packages = "mlr3pipelines"))$new())) +mlr_pipeops$add("learner", PipeOpLearner, list(R6Class("Learner", public = list(id = "learner", task_type = "classif", param_set = ps(), packages = "mlr3pipelines"))$new())) diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R index b011f89b5..994e1045a 100644 --- a/R/PipeOpLearnerCV.R +++ b/R/PipeOpLearnerCV.R @@ -116,19 +116,23 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV", public = list( initialize = function(learner, id = NULL, param_vals = list()) { private$.learner = as_learner(learner, clone = TRUE) - private$.learner$param_set$set_id = "" + if (paradox_info$is_old) { + private$.learner$param_set$set_id = "" + } id = id %??% private$.learner$id # FIXME: can be changed when mlr-org/mlr3#470 has an answer type = private$.learner$task_type task_type = mlr_reflections$task_types[type, mult = "first"]$task - private$.crossval_param_set = ParamSet$new(params = list( - ParamFct$new("method", levels = c("cv", "insample"), tags = c("train", "required")), - ParamInt$new("folds", lower = 2L, upper = Inf, tags = c("train", "required")), - ParamLgl$new("keep_response", tags = c("train", "required")) - )) + private$.crossval_param_set = ps( + method = p_fct(levels = c("cv", "insample"), tags = c("train", "required")), + folds = p_int(lower = 2L, upper = Inf, tags = c("train", "required")), + keep_response = p_lgl(tags = c("train", "required")) + ) private$.crossval_param_set$values = list(method = "cv", folds = 3, keep_response = FALSE) - private$.crossval_param_set$set_id = "resampling" + if (paradox_info$is_old) { + private$.crossval_param_set$set_id = "resampling" + } # Dependencies in paradox have been broken from the start and this is known since at least a year: # https://github.com/mlr-org/paradox/issues/216 # The following would make it _impossible_ to set "method" to "insample", because then "folds" @@ -137,7 +141,7 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV", # in PipeOp ParamSets. # private$.crossval_param_set$add_dep("folds", "method", CondEqual$new("cv")) # don't do this. - super$initialize(id, alist(private$.crossval_param_set, private$.learner$param_set), param_vals = param_vals, can_subset_cols = TRUE, task_type = task_type, tags = c("learner", "ensemble")) + super$initialize(id, alist(resampling = private$.crossval_param_set, private$.learner$param_set), param_vals = param_vals, can_subset_cols = TRUE, task_type = task_type, tags = c("learner", "ensemble")) } ), @@ -218,4 +222,4 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV", ) ) -mlr_pipeops$add("learner_cv", PipeOpLearnerCV, list(R6Class("Learner", public = list(id = "learner_cv", task_type = "classif", param_set = ParamSet$new()))$new())) +mlr_pipeops$add("learner_cv", PipeOpLearnerCV, list(R6Class("Learner", public = list(id = "learner_cv", task_type = "classif", param_set = ps()))$new())) diff --git a/R/PipeOpMissingIndicators.R b/R/PipeOpMissingIndicators.R index 06407b8ca..7e5d819dd 100644 --- a/R/PipeOpMissingIndicators.R +++ b/R/PipeOpMissingIndicators.R @@ -79,10 +79,10 @@ PipeOpMissInd = R6Class("PipeOpMissInd", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "missind", param_vals = list()) { - ps = ParamSet$new(list( - ParamFct$new("which", levels = c("missing_train", "all"), tags = c("train", "required")), - ParamFct$new("type", levels = c("factor", "integer", "logical", "numeric"), tags = c("train", "predict", "required")) - )) + ps = ps( + which = p_fct(levels = c("missing_train", "all"), tags = c("train", "required")), + type = p_fct(levels = c("factor", "integer", "logical", "numeric"), tags = c("train", "predict", "required")) + ) ps$values = list(which = "missing_train", type = "factor") super$initialize(id, ps, param_vals = param_vals, tags = "missings") if ("affect_columns" %nin% names(param_vals)) { diff --git a/R/PipeOpModelMatrix.R b/R/PipeOpModelMatrix.R index 225554783..a9376c04e 100644 --- a/R/PipeOpModelMatrix.R +++ b/R/PipeOpModelMatrix.R @@ -59,9 +59,9 @@ PipeOpModelMatrix = R6Class("PipeOpModelMatrix", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "modelmatrix", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamUty$new("formula", tags = c("train", "predict"), custom_check = check_formula) - )) + ps = ps( + formula = p_uty(tags = c("train", "predict"), custom_check = check_formula) + ) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "stats") } ), diff --git a/R/PipeOpMultiplicity.R b/R/PipeOpMultiplicity.R index 78ba96053..649943dda 100644 --- a/R/PipeOpMultiplicity.R +++ b/R/PipeOpMultiplicity.R @@ -260,9 +260,9 @@ PipeOpReplicate = R6Class("PipeOpReplicate", inherit = PipeOp, public = list( initialize = function(id = "replicate", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamInt$new("reps", lower = 1, tags = c("train", "predict", "required")) - )) + ps = ps( + reps = p_int(lower = 1, tags = c("train", "predict", "required")) + ) ps$values = list(reps = 1) super$initialize(id, param_set = ps, param_vals = param_vals, input = data.table(name = "input", train = "*", predict = "*"), diff --git a/R/PipeOpMutate.R b/R/PipeOpMutate.R index 2121bca7c..26cbe145d 100644 --- a/R/PipeOpMutate.R +++ b/R/PipeOpMutate.R @@ -73,10 +73,10 @@ PipeOpMutate = R6Class("PipeOpMutate", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "mutate", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamUty$new("mutation", custom_check = check_mutation_formulae, tags = c("train", "predict", "required")), - ParamLgl$new("delete_originals", tags = c("train", "predict", "required")) - )) + ps = ps( + mutation = p_uty(custom_check = check_mutation_formulae, tags = c("train", "predict", "required")), + delete_originals = p_lgl(tags = c("train", "predict", "required")) + ) ps$values = list(mutation = list(), delete_originals = FALSE) super$initialize(id, ps, param_vals = param_vals) } diff --git a/R/PipeOpNMF.R b/R/PipeOpNMF.R index 22c425985..bb99d02b1 100644 --- a/R/PipeOpNMF.R +++ b/R/PipeOpNMF.R @@ -107,28 +107,26 @@ PipeOpNMF = R6Class("PipeOpNMF", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "nmf", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamInt$new("rank", lower = 1L, upper = Inf, tags = c("train", "nmf")), - ParamFct$new("method", tags = c("train", "nmf"), + ps = ps( + rank = p_int(lower = 1L, upper = Inf, tags = c("train", "nmf")), + method = p_fct(tags = c("train", "nmf"), levels = c("brunet", "lee", "ls-nmf", "nsNMF", "offset", "pe-nmf", "snmf/r", "snmf/l")), - ParamUty$new("seed", tags = c("train", "nmf")), + seed = p_uty(tags = c("train", "nmf")), # NOTE: rng missing, not well documented - ParamInt$new("nrun", lower = 1L, upper = Inf, default = 1L, tags = c("train", "nmf")), + nrun = p_int(lower = 1L, upper = Inf, default = 1L, tags = c("train", "nmf")), # NOTE: model missing, probably over the top here # the following are .options - ParamLgl$new("debug", default = FALSE, tags = c("train", "nmf.options")), - ParamLgl$new("keep.all", default = FALSE, tags = c("train", "nmf.options")), - ParamUty$new("parallel", default = TRUE, tags = c("train", "nmf.options")), - ParamUty$new("parallel.required", tags = c("train", "nmf.options")), - ParamLgl$new("shared.memory", tags = c("train", "nmf.options")), - ParamLgl$new("simplifyCB", default = TRUE, tags = c("train", "nmf.options")), - ParamLgl$new("track", default = FALSE, tags = c("train", "nmf.options")), - ParamUty$new("verbose", default = FALSE, tags = c("train", "nmf.options")), - ParamUty$new("pbackend", tags = c("train", "nmf")), # .pbackend - ParamUty$new("callback", tags = c("train", "nmf")) # .callback - )) - ps$add_dep("keep.all", on = "nrun", cond = CondLarger$new(1)) - ps$add_dep("callback", on = "keep.all", cond = CondEqual$new(TRUE)) + debug = p_lgl(default = FALSE, tags = c("train", "nmf.options")), + keep.all = p_lgl(default = FALSE, tags = c("train", "nmf.options")), + parallel = p_uty(default = TRUE, tags = c("train", "nmf.options")), + parallel.required = p_uty(tags = c("train", "nmf.options")), + shared.memory = p_lgl(tags = c("train", "nmf.options")), + simplifyCB = p_lgl(default = TRUE, tags = c("train", "nmf.options")), + track = p_lgl(default = FALSE, tags = c("train", "nmf.options")), + verbose = p_uty(default = FALSE, tags = c("train", "nmf.options")), + pbackend = p_uty(tags = c("train", "nmf")), # .pbackend + callback = p_uty(tags = c("train", "nmf"), depends = quote(keep.all == TRUE)) # .callback + ) ps$values = list(rank = 2L, method = "brunet", parallel = FALSE, parallel.required = FALSE) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer"), packages = c("MASS", "NMF")) } @@ -179,10 +177,11 @@ PipeOpNMF = R6Class("PipeOpNMF", mlr_pipeops$add("nmf", PipeOpNMF) -CondLarger = R6Class("CondLarger", inherit = Condition, - public = list( - initialize = function(rhs) super$initialize("larger", rhs), - test = function(x) !is.na(x) & x > self$rhs, - as_string = function(lhs_chr = "x") sprintf("%s > %s", lhs_chr, as.character(self$rhs)) - ) -) +# this is just a really bad idea +## CondLarger = R6Class("CondLarger", inherit = Condition, +## public = list( +## initialize = function(rhs) super$initialize("larger", rhs), +## test = function(x) !is.na(x) & x > self$rhs, +## as_string = function(lhs_chr = "x") sprintf("%s > %s", lhs_chr, as.character(self$rhs)) +## ) +## ) diff --git a/R/PipeOpPCA.R b/R/PipeOpPCA.R index 210bae161..606fc7b97 100644 --- a/R/PipeOpPCA.R +++ b/R/PipeOpPCA.R @@ -68,11 +68,11 @@ PipeOpPCA = R6Class("PipeOpPCA", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "pca", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamLgl$new("center", default = TRUE, tags = c("train", "pca")), - ParamLgl$new("scale.", default = FALSE, tags = c("train", "pca")), - ParamInt$new("rank.", default = NULL, lower = 1, upper = Inf, special_vals = list(NULL), tags = c("train", "pca")) - )) + ps = ps( + center = p_lgl(default = TRUE, tags = c("train", "pca")), + scale. = p_lgl(default = FALSE, tags = c("train", "pca")), + rank. = p_int(default = NULL, lower = 1, upper = Inf, special_vals = list(NULL), tags = c("train", "pca")) + ) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } ), diff --git a/R/PipeOpProxy.R b/R/PipeOpProxy.R index 36bf45bd3..5d7b91236 100644 --- a/R/PipeOpProxy.R +++ b/R/PipeOpProxy.R @@ -85,8 +85,8 @@ PipeOpProxy = R6Class("PipeOpProxy", assert_int(outnum, lower = 1L) # input can be a vararg input channel inname = if (innum) rep_suffix("input", innum) else "..." - ps = ParamSet$new(params = list( - ParamUty$new("content", tags = c("train", "predidct", "required"), custom_check = function(x) { + ps = ps( + content = p_uty(tags = c("train", "predidct", "required"), custom_check = function(x) { # content must be an object that can be coerced to a Graph and the output number must match tryCatch({ graph = as_graph(x) @@ -103,7 +103,7 @@ PipeOpProxy = R6Class("PipeOpProxy", }, error = function(error_condition) "`content` must be an object that can be converted to a Graph") }) - )) + ) ps$values = list(content = PipeOpFeatureUnion$new(innum = innum)) super$initialize(id, param_set = ps, param_vals = param_vals, input = data.table(name = inname, train = "*", predict = "*"), diff --git a/R/PipeOpQuantileBin.R b/R/PipeOpQuantileBin.R index a3eb6cffa..712657908 100644 --- a/R/PipeOpQuantileBin.R +++ b/R/PipeOpQuantileBin.R @@ -56,9 +56,9 @@ PipeOpQuantileBin = R6Class("PipeOpQuantileBin", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "quantilebin", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamInt$new("numsplits", lower = 2, special_vals = list(NULL), tags = "train") - )) + ps = ps( + numsplits = p_int(lower = 2, special_vals = list(NULL), tags = "train") + ) ps$values = list(numsplits = 2L) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "stats", feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpRandomProjection.R b/R/PipeOpRandomProjection.R index 65e7d1fa0..4cdd2bcbb 100644 --- a/R/PipeOpRandomProjection.R +++ b/R/PipeOpRandomProjection.R @@ -70,9 +70,9 @@ PipeOpRandomProjection = R6Class("PipeOpRandomProjection", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "randomprojection", param_vals = list()) { - ps = ParamSet$new(list( - ParamInt$new("rank", lower = 0, tags = "train") - )) + ps = ps( + rank = p_int(lower = 0, tags = "train") + ) ps$values = list(rank = 1) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpRandomResponse.R b/R/PipeOpRandomResponse.R index c97d36d96..b883c8858 100644 --- a/R/PipeOpRandomResponse.R +++ b/R/PipeOpRandomResponse.R @@ -83,11 +83,10 @@ PipeOpRandomResponse = R6Class("PipeOpRandomResponse", inherit = PipeOp, public = list( initialize = function(id = "randomresponse", param_vals = list(), packages = character(0L)) { - ps = ParamSet$new(params = list( - ParamUty$new("rdistfun", tags = c("predict", "required"), custom_check = function(x) { + ps = ps( + rdistfun = p_uty(tags = c("predict", "required"), custom_check = crate(function(x) { check_function(x, args = c("n", "mean", "sd")) - }) - ) + })) ) ps$values = list(rdistfun = stats::rnorm) super$initialize(id = id, param_set = ps, param_vals = param_vals, packages = packages, diff --git a/R/PipeOpRemoveConstants.R b/R/PipeOpRemoveConstants.R index 6e481777c..fb22ea249 100644 --- a/R/PipeOpRemoveConstants.R +++ b/R/PipeOpRemoveConstants.R @@ -63,12 +63,12 @@ PipeOpRemoveConstants = R6Class("PipeOpRemoveConstants", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "removeconstants", param_vals = list()) { - ps = ParamSet$new(list( - ParamDbl$new("ratio", lower = 0, upper = 1, tags = c("train", "required", "constant_check")), - ParamDbl$new("rel_tol", lower = 0, tags = c("required", "constant_check", "train")), - ParamDbl$new("abs_tol", lower = 0, tags = c("required", "constant_check", "train")), - ParamLgl$new("na_ignore", tags = c("train", "required", "constant_check")) - )) + ps = ps( + ratio = p_dbl(lower = 0, upper = 1, tags = c("train", "required", "constant_check")), + rel_tol = p_dbl(lower = 0, tags = c("required", "constant_check", "train")), + abs_tol = p_dbl(lower = 0, tags = c("required", "constant_check", "train")), + na_ignore = p_lgl(tags = c("train", "required", "constant_check")) + ) ps$values = list(ratio = 0, rel_tol = 1e-8, abs_tol = 1e-8, na_ignore = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, tags = "robustify") } diff --git a/R/PipeOpRenameColumns.R b/R/PipeOpRenameColumns.R index 80869bdc4..bfdafbd4d 100644 --- a/R/PipeOpRenameColumns.R +++ b/R/PipeOpRenameColumns.R @@ -60,13 +60,13 @@ PipeOpRenameColumns = R6Class("PipeOpRenameColumns", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "renamecolumns", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamUty$new("renaming", tags = c("train", "predict", "required"), custom_check = function(x) { + ps = ps( + renaming = p_uty(tags = c("train", "predict", "required"), custom_check = function(x) { check_character(x, any.missing = FALSE, names = "strict") %check&&% check_names(x, type = "strict") }), - ParamLgl$new("ignore_missing", tags = c("train", "predict", "required")) - )) + ignore_missing = p_lgl(tags = c("train", "predict", "required")) + ) ps$values = list(renaming = character(0), ignore_missing = FALSE) super$initialize(id, ps, param_vals = param_vals, can_subset_cols = FALSE) } diff --git a/R/PipeOpScale.R b/R/PipeOpScale.R index e4a0e2e34..8a5636c65 100644 --- a/R/PipeOpScale.R +++ b/R/PipeOpScale.R @@ -74,11 +74,11 @@ PipeOpScale = R6Class("PipeOpScale", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "scale", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamLgl$new("center", default = TRUE, tags = c("train", "scale")), - ParamLgl$new("scale", default = TRUE, tags = c("train", "scale")), - ParamLgl$new("robust", tags = c("train", "required")) - )) + ps = ps( + center = p_lgl(default = TRUE, tags = c("train", "scale")), + scale = p_lgl(default = TRUE, tags = c("train", "scale")), + robust = p_lgl(tags = c("train", "required")) + ) ps$values = list(robust = FALSE) super$initialize(id = id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpScaleMaxAbs.R b/R/PipeOpScaleMaxAbs.R index 46b1f67dc..a4abe2e32 100644 --- a/R/PipeOpScaleMaxAbs.R +++ b/R/PipeOpScaleMaxAbs.R @@ -54,9 +54,9 @@ PipeOpScaleMaxAbs = R6Class("PipeOpScaleMaxAbs", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "scalemaxabs", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamDbl$new("maxabs", lower = 0, tags = c("required", "train", "predict")) - )) + ps = ps( + maxabs = p_dbl(lower = 0, tags = c("required", "train", "predict")) + ) ps$values = list(maxabs = 1) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpScaleRange.R b/R/PipeOpScaleRange.R index 6a455c20e..e5f547c24 100644 --- a/R/PipeOpScaleRange.R +++ b/R/PipeOpScaleRange.R @@ -59,10 +59,10 @@ PipeOpScaleRange = R6Class("PipeOpScaleRange", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "scalerange", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamDbl$new("lower", tags = c("required", "train")), - ParamDbl$new("upper", tags = c("required", "train")) - )) + ps = ps( + lower = p_dbl(tags = c("required", "train")), + upper = p_dbl(tags = c("required", "train")) + ) ps$values = list(lower = 0, upper = 1) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpSelect.R b/R/PipeOpSelect.R index 57f777e09..92721704b 100644 --- a/R/PipeOpSelect.R +++ b/R/PipeOpSelect.R @@ -69,9 +69,9 @@ PipeOpSelect = R6Class("PipeOpSelect", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "select", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamUty$new("selector", custom_check = check_function, tags = c("train", "required")) - )) + ps = ps( + selector = p_uty(custom_check = check_function, tags = c("train", "required")) + ) ps$values = list(selector = selector_all()) super$initialize(id, ps, param_vals = param_vals, tags = "feature selection") } diff --git a/R/PipeOpSmote.R b/R/PipeOpSmote.R index e9f1e2c01..9e512bbd0 100644 --- a/R/PipeOpSmote.R +++ b/R/PipeOpSmote.R @@ -71,12 +71,12 @@ PipeOpSmote = R6Class("PipeOpSmote", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "smote", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamInt$new("K", lower = 1, default = 5, tags = c("train", "smote")), + ps = ps( + K = p_int(lower = 1, default = 5, tags = c("train", "smote")), # dup_size = 0 leads to behaviour different from 1, 2, 3, ..., because it means "autodetect", # so it is a 'special_vals'. - ParamInt$new("dup_size", lower = 1, default = 0, special_vals = list(0), tags = c("train", "smote")) - )) + dup_size = p_int(lower = 1, default = 0, special_vals = list(0), tags = c("train", "smote")) + ) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "smotefamily", can_subset_cols = FALSE, tags = "imbalanced data") } diff --git a/R/PipeOpSpatialSign.R b/R/PipeOpSpatialSign.R index 9ef2a1ade..0ac0559d4 100644 --- a/R/PipeOpSpatialSign.R +++ b/R/PipeOpSpatialSign.R @@ -55,10 +55,10 @@ PipeOpSpatialSign = R6Class("PipeOpSpatialSign", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "spatialsign", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamDbl$new("length", tags = c("train", "predict"), lower = 0), - ParamDbl$new("norm", tags = c("train", "predict"), lower = 0) - )) + ps = ps( + length = p_dbl(tags = c("train", "predict"), lower = 0), + norm = p_dbl(tags = c("train", "predict"), lower = 0) + ) ps$values = list(norm = 2, length = 1) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer")) } diff --git a/R/PipeOpSubsample.R b/R/PipeOpSubsample.R index 38e1fb5d7..3d657c2bc 100644 --- a/R/PipeOpSubsample.R +++ b/R/PipeOpSubsample.R @@ -64,11 +64,11 @@ PipeOpSubsample = R6Class("PipeOpSubsample", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "subsample", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamDbl$new("frac", lower = 0, upper = Inf, tags = "train"), - ParamLgl$new("stratify", tags = "train"), - ParamLgl$new("replace", tags = "train") - )) + ps = ps( + frac = p_dbl(lower = 0, upper = Inf, tags = "train"), + stratify = p_lgl(tags = "train"), + replace = p_lgl(tags = "train") + ) ps$values = list(frac = 1 - exp(-1), stratify = FALSE, replace = FALSE) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE) } diff --git a/R/PipeOpTaskPreproc.R b/R/PipeOpTaskPreproc.R index 1e0381ec1..4739b37cf 100644 --- a/R/PipeOpTaskPreproc.R +++ b/R/PipeOpTaskPreproc.R @@ -36,7 +36,7 @@ #' #' @section Construction: #' ``` -#' PipeOpTaskPreproc$new(id, param_set = ParamSet$new(), param_vals = list(), can_subset_cols = TRUE, +#' PipeOpTaskPreproc$new(id, param_set = ps(), param_vals = list(), can_subset_cols = TRUE, #' packages = character(0), task_type = "Task", tags = NULL, feature_types = mlr_reflections$task_feature_types) #' ``` #' @@ -168,14 +168,18 @@ PipeOpTaskPreproc = R6Class("PipeOpTaskPreproc", inherit = PipeOp, public = list( - initialize = function(id, param_set = ParamSet$new(), param_vals = list(), can_subset_cols = TRUE, + initialize = function(id, param_set = ps(), param_vals = list(), can_subset_cols = TRUE, packages = character(0), task_type = "Task", tags = NULL, feature_types = mlr_reflections$task_feature_types) { if (can_subset_cols) { - acp = ParamUty$new("affect_columns", custom_check = check_function_or_null, default = selector_all(), tags = "train") + affectcols_ps = ps(affect_columns = p_uty(custom_check = check_function_or_null, default = selector_all(), tags = "train")) if (inherits(param_set, "ParamSet")) { - param_set$add(acp) + if (paradox_info$is_old) { + lapply(affectcols_ps$params, param_set$add) + } else { + param_set = c(param_set, affectcols_ps) + } } else { - private$.affectcols_ps = ParamSet$new(list(acp)) + private$.affectcols_ps = affectcols_ps param_set = c(param_set, alist(private$.affectcols_ps)) } } @@ -317,7 +321,7 @@ PipeOpTaskPreproc = R6Class("PipeOpTaskPreproc", #' #' @section Construction: #' ``` -#' PipeOpTaskPreprocSimple$new(id, param_set = ParamSet$new(), param_vals = list(), can_subset_cols = TRUE, packages = character(0), task_type = "Task") +#' PipeOpTaskPreprocSimple$new(id, param_set = ps(), param_vals = list(), can_subset_cols = TRUE, packages = character(0), task_type = "Task") #' ``` #' (Construction is identical to [`PipeOpTaskPreproc`].) #' diff --git a/R/PipeOpTextVectorizer.R b/R/PipeOpTextVectorizer.R index 4d57c4b76..8550fdb22 100644 --- a/R/PipeOpTextVectorizer.R +++ b/R/PipeOpTextVectorizer.R @@ -165,61 +165,54 @@ PipeOpTextVectorizer = R6Class("PipeOpTextVectorizer", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "textvectorizer", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamFct$new("stopwords_language", tags = c("train", "predict"), + ps = ps( + stopwords_language = p_fct(tags = c("train", "predict"), levels = c("da", "de", "en", "es", "fi", "fr", "hu", "ir", "it", "nl", "no", "pt", "ro", "ru", "sv" , "smart", "none")), - ParamUty$new("extra_stopwords", tags = c("train", "predict"), custom_check = check_character), + extra_stopwords = p_uty(tags = c("train", "predict"), custom_check = check_character), - ParamLgl$new("tolower", default = TRUE, tags = c("train", "predict", "dfm")), - ParamLgl$new("stem", default = FALSE, tags = c("train", "predict", "dfm")), + tolower = p_lgl(default = TRUE, tags = c("train", "predict", "dfm")), + stem = p_lgl(default = FALSE, tags = c("train", "predict", "dfm")), - ParamFct$new("what", default = "word", tags = c("train", "predict", "tokenizer"), + what = p_fct(default = "word", tags = c("train", "predict", "tokenizer"), levels = c("word", "word1", "fasterword", "fastestword", "character", "sentence")), - ParamLgl$new("remove_punct", default = FALSE, tags = c("train", "predict", "tokenizer")), - ParamLgl$new("remove_symbols", default = FALSE, tags = c("train", "predict", "tokenizer")), - ParamLgl$new("remove_numbers", default = FALSE, tags = c("train", "predict", "tokenizer")), - ParamLgl$new("remove_url", default = FALSE, tags = c("train", "predict", "tokenizer")), - ParamLgl$new("remove_separators", default = TRUE, tags = c("train", "predict", "tokenizer")), - ParamLgl$new("split_hyphens", default = FALSE, tags = c("train", "predict", "tokenizer")), + remove_punct = p_lgl(default = FALSE, tags = c("train", "predict", "tokenizer")), + remove_symbols = p_lgl(default = FALSE, tags = c("train", "predict", "tokenizer")), + remove_numbers = p_lgl(default = FALSE, tags = c("train", "predict", "tokenizer")), + remove_url = p_lgl(default = FALSE, tags = c("train", "predict", "tokenizer")), + remove_separators = p_lgl(default = TRUE, tags = c("train", "predict", "tokenizer")), + split_hyphens = p_lgl(default = FALSE, tags = c("train", "predict", "tokenizer")), - ParamUty$new("n", default = 2, tags = c("train", "predict", "ngrams"), custom_check = curry(check_integerish, min.len = 1, lower = 1, any.missing = FALSE)), - ParamUty$new("skip", default = 0, tags = c("train", "predict", "ngrams"), custom_check = curry(check_integerish, min.len = 1, lower = 0, any.missing = FALSE)), + n = p_uty(default = 2, tags = c("train", "predict", "ngrams"), custom_check = curry(check_integerish, min.len = 1, lower = 1, any.missing = FALSE)), + skip = p_uty(default = 0, tags = c("train", "predict", "ngrams"), custom_check = curry(check_integerish, min.len = 1, lower = 0, any.missing = FALSE)), - ParamDbl$new("sparsity", lower = 0, upper = 1, default = NULL, - tags = c("train", "dfm_trim"), special_vals = list(NULL)), - ParamFct$new("termfreq_type", default = "count", tags = c("train", "dfm_trim"), + sparsity = p_dbl(lower = 0, upper = 1, default = NULL, + tags = c("train", "dfm_trim"), special_vals = list(NULL), + depends = quote(return_type == "bow")), + termfreq_type = p_fct(default = "count", tags = c("train", "dfm_trim"), levels = c("count", "prop", "rank", "quantile")), - ParamDbl$new("min_termfreq", lower = 0, default = NULL, + min_termfreq = p_dbl(lower = 0, default = NULL, tags = c("train", "dfm_trim"), special_vals = list(NULL)), - ParamDbl$new("max_termfreq", lower = 0, default = NULL, + max_termfreq = p_dbl(lower = 0, default = NULL, tags = c("train", "dfm_trim"), special_vals = list(NULL)), - ParamFct$new("scheme_df", default = "count", tags = c("train", "docfreq"), + scheme_df = p_fct(default = "count", tags = c("train", "docfreq"), levels = c("count", "inverse", "inversemax", "inverseprob", "unary")), - ParamDbl$new("smoothing_df", lower = 0, default = 0, tags = c("train", "docfreq")), - ParamDbl$new("k_df", lower = 0, tags = c("train", "docfreq")), - ParamDbl$new("threshold_df", lower = 0, default = 0, tags = c("train", "docfreq")), - ParamDbl$new("base_df", lower = 0, default = 10, tags = c("train", "docfreq")), + smoothing_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq"), depends = quote(scheme_df %in% c("inverse", "inversemax", "inverseprob"))), + k_df = p_dbl(lower = 0, tags = c("train", "docfreq"), depends = quote(scheme_df %in% c("inverse", "inversemax", "inverseprob"))), + threshold_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq"), depends = quote(scheme_df == "count")), + base_df = p_dbl(lower = 0, default = 10, tags = c("train", "docfreq"), + depends = quote(scheme_df %in% c("inverse", "inversemax", "inverseprob"))), - ParamFct$new("scheme_tf", default = "count", tags = c("train", "predict", "dfm_weight"), + scheme_tf = p_fct(default = "count", tags = c("train", "predict", "dfm_weight"), depends = quote(return_type == "bow"), levels = c("count", "prop", "propmax", "logcount", "boolean", "augmented", "logave")), - ParamDbl$new("k_tf", lower = 0, upper = 1, tags = c("train", "predict", "dfm_weight")), - ParamDbl$new("base_tf", lower = 0, default = 10, tags = c("train", "predict", "dfm_weight")), + k_tf = p_dbl(lower = 0, upper = 1, tags = c("train", "predict", "dfm_weight"), depends = quote(scheme_tf == "augmented")), + base_tf = p_dbl(lower = 0, default = 10, tags = c("train", "predict", "dfm_weight"), depends = quote(scheme_tf %in% c("logcount", "logave"))), - ParamFct$new("return_type", levels = c("bow", "integer_sequence", "factor_sequence"), tags = c("train", "predict")), - ParamInt$new("sequence_length", default = 0, lower = 0, upper = Inf, tags = c("train", "predict", "integer_sequence")) - ))$ - add_dep("base_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ - add_dep("smoothing_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ - add_dep("k_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ - add_dep("base_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ - add_dep("threshold_df", "scheme_df", CondEqual$new("count"))$ - add_dep("k_tf", "scheme_tf", CondEqual$new("augmented"))$ - add_dep("base_tf", "scheme_tf", CondAnyOf$new(c("logcount", "logave")))$ - add_dep("scheme_tf", "return_type", CondEqual$new("bow"))$ - add_dep("sparsity", "return_type", CondEqual$new("bow"))$ - add_dep("sequence_length", "return_type", CondAnyOf$new(c("integer_sequence", "factor_sequence"))) + return_type = p_fct(levels = c("bow", "integer_sequence", "factor_sequence"), tags = c("train", "predict")), + sequence_length = p_int(default = 0, lower = 0, upper = Inf, tags = c("train", "predict", "integer_sequence"), + depends = quote(return_type %in% c("integer_sequence", "factor_sequence"))) + ) ps$values = list(stopwords_language = "smart", extra_stopwords = character(0), n = 1, scheme_df = "unary", return_type = "bow") super$initialize(id = id, param_set = ps, param_vals = param_vals, packages = c("quanteda", "stopwords"), feature_types = "character") diff --git a/R/PipeOpThreshold.R b/R/PipeOpThreshold.R index 61db915ff..3f9dae220 100644 --- a/R/PipeOpThreshold.R +++ b/R/PipeOpThreshold.R @@ -56,8 +56,8 @@ PipeOpThreshold = R6Class("PipeOpThreshold", inherit = PipeOp, public = list( initialize = function(id = "threshold", param_vals = list()) { - param_set = ParamSet$new() - param_set$add(ParamUty$new("thresholds", custom_check = check_numeric_valid_threshold, tags = "predict")) + param_set = ps(thresholds = p_uty(custom_check = check_numeric_valid_threshold, tags = "predict")) + param_set$values$thresholds = 0.5 super$initialize(id, param_set = param_set, param_vals = param_vals, packages = character(0), input = data.table(name = "input", train = "NULL", predict = "PredictionClassif"), diff --git a/R/PipeOpTrafo.R b/R/PipeOpTrafo.R index 54365b3db..101256cd0 100644 --- a/R/PipeOpTrafo.R +++ b/R/PipeOpTrafo.R @@ -15,7 +15,7 @@ #' #' @section Construction: #' ``` -#' PipeOpTargetTrafo$new(id, param_set = ParamSet$new(), param_vals = list() packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) +#' PipeOpTargetTrafo$new(id, param_set = ps(), param_vals = list() packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) #' ``` #' #' * `id` :: `character(1)`\cr @@ -118,7 +118,7 @@ PipeOpTargetTrafo = R6Class("PipeOpTargetTrafo", inherit = PipeOp, public = list( - initialize = function(id, param_set = ParamSet$new(), param_vals = list(), packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) { + initialize = function(id, param_set = ps(), param_vals = list(), packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) { super$initialize(id = id, param_set = param_set, param_vals = param_vals, input = data.table(name = "input", train = task_type_in, predict = task_type_in), output = data.table(name = c("fun", "output"), train = c("NULL", task_type_out), predict = c("function", task_type_out)), @@ -348,10 +348,10 @@ PipeOpTargetMutate = R6Class("PipeOpTargetMutate", public = list( initialize = function(id = "targetmutate", param_vals = list(), new_task_type = NULL) { private$.new_task_type = assert_choice(new_task_type, mlr_reflections$task_types$type, null.ok = TRUE) - ps = ParamSet$new(list( - ParamUty$new("trafo", tags = c("train", "predict"), custom_check = function(x) check_function(x, nargs = 1L)), - ParamUty$new("inverter", tags = "predict", custom_check = function(x) check_function(x, nargs = 1L)) - )) + ps = ps( + trafo = p_uty(tags = c("train", "predict"), custom_check = function(x) check_function(x, nargs = 1L)), + inverter = p_uty(tags = "predict", custom_check = function(x) check_function(x, nargs = 1L)) + ) # We could add a condition here for new_task_type on trafo and inverter when mlr-org/paradox#278 has an answer. # HOWEVER conditions are broken in paradox, it is a terrible idea to use them in PipeOps, # see https://github.com/mlr-org/paradox/issues/216 and related comment in PipeOpLearnerCV @@ -457,10 +457,10 @@ PipeOpTargetTrafoScaleRange = R6Class("PipeOpTargetTrafoScaleRange", inherit = PipeOpTargetTrafo, public = list( initialize = function(id = "targettrafoscalerange", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamDbl$new("lower", tags = c("required", "train")), - ParamDbl$new("upper", tags = c("required", "train")) - )) + ps = ps( + lower = p_dbl(tags = c("required", "train")), + upper = p_dbl(tags = c("required", "train")) + ) ps$values = list(lower = 0, upper = 1) super$initialize(id = id, param_set = ps, param_vals = param_vals, task_type_in = "TaskRegr") } @@ -515,7 +515,7 @@ mlr_pipeops$add("targettrafoscalerange", PipeOpTargetTrafoScaleRange) #' #' @section Construction: #' ``` -#' PipeOpUpdateTarget$new(id, param_set = ParamSet$new(), +#' PipeOpUpdateTarget$new(id, param_set = ps(), #' param_vals = list(), packages = character(0)) #' ``` #' @@ -565,12 +565,11 @@ PipeOpUpdateTarget = R6Class("PipeOpUpdateTarget", inherit = PipeOp, public = list( initialize = function(id = "update_target", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamUty$new("trafo", tags = c("train", "predict"), custom_check = function(x) check_function(x, nargs = 1L)), - ParamUty$new("new_target_name", tags = c("train", "predict"), custom_check = function(x) check_character(x, any.missing = FALSE, len = 1L)), - ParamUty$new("new_task_type", tags = c("train", "predict"), custom_check = function(x) check_choice(x, choices = mlr_reflections$task_types$type)), - ParamLgl$new("drop_original_target", tags = c("train", "predict")) - ) + ps = ps( + trafo = p_uty(tags = c("train", "predict"), custom_check = function(x) check_function(x, nargs = 1L)), + new_target_name = p_uty(tags = c("train", "predict"), custom_check = function(x) check_character(x, any.missing = FALSE, len = 1L)), + new_task_type = p_uty(tags = c("train", "predict"), custom_check = function(x) check_choice(x, choices = mlr_reflections$task_types$type)), + drop_original_target = p_lgl(tags = c("train", "predict")) ) ps$values = list(trafo = identity, drop_original_target = TRUE) super$initialize(id = id, param_set = ps, param_vals = param_vals, diff --git a/R/PipeOpTuneThreshold.R b/R/PipeOpTuneThreshold.R index 311f5f433..e4891eb6f 100644 --- a/R/PipeOpTuneThreshold.R +++ b/R/PipeOpTuneThreshold.R @@ -75,12 +75,12 @@ PipeOpTuneThreshold = R6Class("PipeOpTuneThreshold", public = list( initialize = function(id = "tunethreshold", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamUty$new("measure", custom_check = check_class_or_character("Measure", mlr_measures), tags = "train"), - ParamUty$new("optimizer", custom_check = check_optimizer, tags = "train"), - ParamUty$new("log_level", tags = "train", + ps = ps( + measure = p_uty(custom_check = check_class_or_character("Measure", mlr_measures), tags = "train"), + optimizer = p_uty(custom_check = check_optimizer, tags = "train"), + log_level = p_uty(tags = "train", function(x) check_string(x) %check||% check_integerish(x)) - )) + ) ps$values = list(measure = "classif.ce", optimizer = "gensa", log_level = "warn") super$initialize(id, param_set = ps, param_vals = param_vals, packages = "bbotk", input = data.table(name = "input", train = "Task", predict = "Task"), @@ -120,7 +120,8 @@ PipeOpTuneThreshold = R6Class("PipeOpTuneThreshold", ps = private$.make_param_set(pred) measure = self$param_set$values$measure if (is.character(measure)) measure = msr(measure) else measure - codomain = ParamSet$new(list(ParamDbl$new(id = measure$id, tags = ifelse(measure$minimize, "minimize", "maximize")))) + codomain = do.call(paradox::ps, structure(list(p_dbl(tags = ifelse(measure$minimize, "minimize", "maximize"))), names = measure$id)) + objfun = bbotk::ObjectiveRFun$new( fun = function(xs) private$.objfun(xs, pred = pred, measure = measure), domain = ps, codomain = codomain diff --git a/R/PipeOpVtreat.R b/R/PipeOpVtreat.R index acb46899e..677fbc7dd 100644 --- a/R/PipeOpVtreat.R +++ b/R/PipeOpVtreat.R @@ -128,39 +128,38 @@ PipeOpVtreat = R6Class("PipeOpVtreat", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "vtreat", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamLgl$new("recommended", tags = c("train", "predict")), - ParamUty$new("cols_to_copy", custom_check = checkmate::check_function, tags = c("train", "predict")), + ps = ps( + recommended = p_lgl(tags = c("train", "predict")), + cols_to_copy = p_uty(custom_check = checkmate::check_function, tags = c("train", "predict")), # tags stand for: regression vtreat::regression_parameters() / classification vtreat::classification_parameters() / multinomial vtreat::multinomial_parameters() - ParamDbl$new("minFraction", lower = 0, upper = 1, default = 0.02, tags = c("train", "regression", "classification", "multinomial")), - ParamDbl$new("smFactor", lower = 0, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), - ParamInt$new("rareCount", lower = 0L, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), - ParamDbl$new("rareSig", lower = 0, upper = 1, special_vals = list(NULL), tags = c("train", "regression", "classification", "multinomial")), # default NULL for regression, classification, 1 for multinomial - ParamDbl$new("collarProb", lower = 0, upper = 1, default = 0, tags = c("train", "regression", "classification", "multinomial")), - ParamLgl$new("doCollar", default = FALSE, tags = c("train", "regression", "classification", "multinomial")), - ParamUty$new("codeRestriction", default = NULL, custom_check = function(x) checkmate::check_character(x, any.missing = FALSE, null.ok = TRUE), + minFraction = p_dbl(lower = 0, upper = 1, default = 0.02, tags = c("train", "regression", "classification", "multinomial")), + smFactor = p_dbl(lower = 0, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), + rareCount = p_int(lower = 0L, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), + rareSig = p_dbl(lower = 0, upper = 1, special_vals = list(NULL), tags = c("train", "regression", "classification", "multinomial")), # default NULL for regression, classification, 1 for multinomial + collarProb = p_dbl(lower = 0, upper = 1, default = 0, tags = c("train", "regression", "classification", "multinomial"), depends = quote(doCollar == TRUE)), + doCollar = p_lgl(default = FALSE, tags = c("train", "regression", "classification", "multinomial")), + codeRestriction = p_uty(default = NULL, custom_check = function(x) checkmate::check_character(x, any.missing = FALSE, null.ok = TRUE), tags = c("train", "regression", "classification", "multinomial")), - ParamUty$new("customCoders", default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "regression", "classification", "multinomial")), - ParamUty$new("splitFunction", default = NULL, custom_check = function(x) checkmate::check_function(x, args = c("nSplits", "nRows", "dframe", "y"), null.ok = TRUE), + customCoders = p_uty(default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "regression", "classification", "multinomial")), + splitFunction = p_uty(default = NULL, custom_check = function(x) checkmate::check_function(x, args = c("nSplits", "nRows", "dframe", "y"), null.ok = TRUE), tags = c("train", "regression", "classification", "multinomial")), - ParamInt$new("ncross", lower = 2L, upper = Inf, default = 3L, tags = c("train", "regression", "classification", "multinomial")), - ParamLgl$new("forceSplit", default = FALSE, tags = c("train", "regression", "classification", "multinomial")), - ParamLgl$new("catScaling", tags = c("train", "regression", "classification", "multinomial")), # default TRUE for regression, classification, FALSE for multinomial - ParamLgl$new("verbose", default = FALSE, tags = c("train", "regression", "classification", "multinomial")), - ParamLgl$new("use_paralell", default = TRUE, tags = c("train", "regression", "classification", "multinomial")), - ParamUty$new("missingness_imputation", default = NULL, custom_check = function(x) checkmate::check_function(x, args = c("values", "weights"), null.ok = TRUE), + ncross = p_int(lower = 2L, upper = Inf, default = 3L, tags = c("train", "regression", "classification", "multinomial")), + forceSplit = p_lgl(default = FALSE, tags = c("train", "regression", "classification", "multinomial")), + catScaling = p_lgl(tags = c("train", "regression", "classification", "multinomial")), # default TRUE for regression, classification, FALSE for multinomial + verbose = p_lgl(default = FALSE, tags = c("train", "regression", "classification", "multinomial")), + use_paralell = p_lgl(default = TRUE, tags = c("train", "regression", "classification", "multinomial")), + missingness_imputation = p_uty(default = NULL, custom_check = function(x) checkmate::check_function(x, args = c("values", "weights"), null.ok = TRUE), tags = c("train", "regression", "classification", "multinomial")), - ParamDbl$new("pruneSig", lower = 0, upper = 1, special_vals = list(NULL), default = NULL, tags = c("train", "regression", "classification")), - ParamLgl$new("scale", default = FALSE, tags = c("train", "regression", "classification", "multinomial")), - ParamUty$new("varRestriction", default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "regression", "classification")), - ParamUty$new("trackedValues", default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "regression", "classification")), + pruneSig = p_dbl(lower = 0, upper = 1, special_vals = list(NULL), default = NULL, tags = c("train", "regression", "classification")), + scale = p_lgl(default = FALSE, tags = c("train", "regression", "classification", "multinomial")), + varRestriction = p_uty(default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "regression", "classification")), + trackedValues = p_uty(default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "regression", "classification")), # NOTE: check_for_duplicate_frames not needed - ParamUty$new("y_dependent_treatments", default = "catB", custom_check = function(x) checkmate::check_character(x, any.missing = FALSE), tags = c("train", "multinomial")), + y_dependent_treatments = p_uty(default = "catB", custom_check = function(x) checkmate::check_character(x, any.missing = FALSE), tags = c("train", "multinomial")), # NOTE: imputation_map is also in multinomial_parameters(); this is redundant so only include it here - ParamUty$new("imputation_map", default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "predict")) + imputation_map = p_uty(default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "predict")) # NOTE: parallelCluster missing intentionally and will be set to NULL - )) - ps$add_dep("collarProb", on = "doCollar", cond = CondEqual$new(TRUE)) + ) ps$values = list(recommended = TRUE, cols_to_copy = selector_none()) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "vtreat", tags = c("encode", "missings")) } diff --git a/R/PipeOpYeoJohnson.R b/R/PipeOpYeoJohnson.R index 97923840d..99d309f7c 100644 --- a/R/PipeOpYeoJohnson.R +++ b/R/PipeOpYeoJohnson.R @@ -67,12 +67,12 @@ PipeOpYeoJohnson = R6Class("PipeOpYeoJohnson", inherit = PipeOpTaskPreproc, public = list( initialize = function(id = "yeojohnson", param_vals = list()) { - ps = ParamSet$new(params = list( - ParamDbl$new("eps", default = 0.001, lower = 0, tags = c("train", "yj")), - ParamLgl$new("standardize", default = TRUE, tags = c("train", "yj")), - ParamDbl$new("lower", tags = c("train", "yj")), - ParamDbl$new("upper", tags = c("train", "yj")) - )) + ps = ps( + eps = p_dbl(default = 0.001, lower = 0, tags = c("train", "yj")), + standardize = p_lgl(default = TRUE, tags = c("train", "yj")), + lower = p_dbl(tags = c("train", "yj")), + upper = p_dbl(tags = c("train", "yj")) + ) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "bestNormalize", feature_types = c("numeric", "integer")) } diff --git a/R/assert_graph.R b/R/assert_graph.R index 03c723e14..fadaa22b7 100644 --- a/R/assert_graph.R +++ b/R/assert_graph.R @@ -39,8 +39,10 @@ as_graph = function(x, clone = FALSE) { } #' @export -as_graph.default = function(x, clone = FALSE) { - Graph$new()$add_pipeop(x) # add_pipeop always clones and checks automatically for convertability +as_graph.default = function(x, clone = TRUE) { + # different default than other methods for backwards compatibility + # previously $add_pipeop() always cloned its input + Graph$new()$add_pipeop(x, clone = clone) } #' @export diff --git a/R/pipeline_bagging.R b/R/pipeline_bagging.R index afcf0c7f9..31b743d32 100644 --- a/R/pipeline_bagging.R +++ b/R/pipeline_bagging.R @@ -28,6 +28,9 @@ #' predictions respectively. #' If `NULL` (default), no averager is added to the end of the graph. #' Note that setting `collect_multipliciy = TRUE` during construction of the averager is required. +#' @param replace `logical(1)` \cr +#' Whether to sample with replacement. +#' Default `FALSE`. #' @return [`Graph`] #' @export #' @examples @@ -36,9 +39,14 @@ #' lrn_po = po("learner", lrn("regr.rpart")) #' task = mlr_tasks$get("boston_housing") #' gr = pipeline_bagging(lrn_po, 3, averager = po("regravg", collect_multiplicity = TRUE)) -#' resample(task, GraphLearner$new(gr), rsmp("holdout")) +#' resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() +#' +#' # The original bagging method uses boosting by sampling with replacement. +#' gr = ppl("bagging", lrn_po, frac = 1, replace = TRUE, +#' averager = po("regravg", collect_multiplicity = TRUE)) +#' resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() #' } -pipeline_bagging = function(graph, iterations = 10, frac = 0.7, averager = NULL) { +pipeline_bagging = function(graph, iterations = 10, frac = 0.7, averager = NULL, replace = FALSE) { g = as_graph(graph) assert_count(iterations) assert_number(frac, lower = 0, upper = 1) @@ -50,7 +58,7 @@ pipeline_bagging = function(graph, iterations = 10, frac = 0.7, averager = NULL) } po("replicate", param_vals = list(reps = iterations)) %>>!% - po("subsample", param_vals = list(frac = frac)) %>>!% + po("subsample", param_vals = list(frac = frac, replace = replace)) %>>!% g %>>!% averager } diff --git a/R/pipeline_robustify.R b/R/pipeline_robustify.R index 1d9774627..abb386c4b 100644 --- a/R/pipeline_robustify.R +++ b/R/pipeline_robustify.R @@ -170,7 +170,7 @@ pipeline_robustify = function(task = NULL, learner = NULL, imputing, po("missind", affect_columns = selector_type(c("numeric", "integer", "logical")), type = if (missind_numeric) "numeric" else "factor") )), - if (has_numbers || has_logicals) po("featureunion"), + if (has_numbers || has_logicals) po("featureunion", id = "featureunion_robustify"), if (has_factorials) po("imputeoor") ) diff --git a/R/pipeline_stacking.R b/R/pipeline_stacking.R index cb1512126..4ac1f0a0d 100644 --- a/R/pipeline_stacking.R +++ b/R/pipeline_stacking.R @@ -53,7 +53,7 @@ pipeline_stacking = function(base_learners, super_learner, method = "cv", folds if (use_features) base_learners_cv = c(base_learners_cv, po("nop")) gunion(base_learners_cv, in_place = TRUE) %>>!% - po("featureunion") %>>!% + po("featureunion", id = "featureunion_stacking") %>>!% super_learner } diff --git a/R/ppl.R b/R/ppl.R index 113f80420..1754fb9de 100644 --- a/R/ppl.R +++ b/R/ppl.R @@ -3,7 +3,7 @@ #' @description #' Creates a [`Graph`] from [`mlr_graphs`] from given ID #' -#' `ppl()` taks a `charcter(1)` and returns a [`Graph`]. `ppls()` takes a `character` +#' `ppl()` taks a `character(1)` and returns a [`Graph`]. `ppls()` takes a `character` #' vector of any list and returns a `list` of possibly muliple [`Graph`]s. #' #' @param .key `[character(1)]`\cr diff --git a/R/zzz.R b/R/zzz.R index 0573770ba..cf50d0dfe 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -18,6 +18,8 @@ register_mlr3 = function() { "multiplicity"))) } +paradox_info <- list2env(list(is_old = FALSE), parent = emptyenv()) + .onLoad = function(libname, pkgname) { # nocov start register_mlr3() setHook(packageEvent("mlr3", "onLoad"), function(...) register_mlr3(), action = "append") @@ -27,6 +29,7 @@ register_mlr3 = function() { if (Sys.getenv("IN_PKGDOWN") == "true") { lg$set_threshold("warn") } + paradox_info$is_old = "set_id" %in% names(ps()) } # nocov end .onUnload = function(libpath) { # nocov start diff --git a/man/Graph.Rd b/man/Graph.Rd index f45cb794e..1a566db64 100644 --- a/man/Graph.Rd +++ b/man/Graph.Rd @@ -69,7 +69,7 @@ Stores a checksum calculated on the \code{\link{Graph}} configuration, which inc \item \code{phash} :: \code{character(1)} \cr Stores a checksum calculated on the \code{\link{Graph}} configuration, which includes all \code{\link{PipeOp}} hashes \emph{except} their \verb{$param_set$values}, and a hash of \verb{$edges}. -\item \code{keep_results} :: \code{logical(1)} \cr +\item \code{keep_results} :: \code{logical(1)}\cr Whether to store intermediate results in the \code{\link{PipeOp}}'s \verb{$.result} slot, mostly for debugging purposes. Default \code{FALSE}. \item \code{man} :: \code{character(1)}\cr Identifying string of the help page that shows with \code{help()}. @@ -83,13 +83,14 @@ Identifying string of the help page that shows with \code{help()}. (\code{logical(1)}) -> \code{character} \cr Get IDs of all \code{\link{PipeOp}}s. This is in order that \code{\link{PipeOp}}s were added if \code{sorted} is \code{FALSE}, and topologically sorted if \code{sorted} is \code{TRUE}. -\item \code{add_pipeop(op)} \cr -(\code{\link{PipeOp}} | \code{\link[mlr3:Learner]{Learner}} | \code{\link[mlr3filters:Filter]{Filter}} | \code{...}) -> \code{self} \cr +\item \code{add_pipeop(op, clone = TRUE)} \cr +(\code{\link{PipeOp}} | \code{\link[mlr3:Learner]{Learner}} | \code{\link[mlr3filters:Filter]{Filter}} | \code{...}, \code{logical(1)}) -> \code{self} \cr Mutates \code{\link{Graph}} by adding a \code{\link{PipeOp}} to the \code{\link{Graph}}. This does not add any edges, so the new \code{\link{PipeOp}} will not be connected within the \code{\link{Graph}} at first.\cr Instead of supplying a \code{\link{PipeOp}} directly, an object that can naturally be converted to a \code{\link{PipeOp}} can also be supplied, e.g. a \code{\link[mlr3:Learner]{Learner}} or a \code{\link[mlr3filters:Filter]{Filter}}; see \code{\link[=as_pipeop]{as_pipeop()}}. -The argument given as \code{op} is always cloned; to access a \code{Graph}'s \code{\link{PipeOp}}s by-reference, use \verb{$pipeops}.\cr +The argument given as \code{op} is cloned if \code{clone} is \code{TRUE} (default); to access a \code{Graph}'s \code{\link{PipeOp}}s +by-reference, use \verb{$pipeops}.\cr Note that \verb{$add_pipeop()} is a relatively low-level operation, it is recommended to build graphs using \code{\link{\%>>\%}}. \item \code{add_edge(src_id, dst_id, src_channel = NULL, dst_channel = NULL)} \cr (\code{character(1)}, \code{character(1)}, diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd index 9d485952d..9d64936f4 100644 --- a/man/PipeOp.Rd +++ b/man/PipeOp.Rd @@ -39,7 +39,7 @@ is not intended to be instantiated. \section{Construction}{ -\if{html}{\out{
}}\preformatted{PipeOp$new(id, param_set = ParamSet$new(), param_vals = list(), input, output, packages = character(0), tags = character(0)) +\if{html}{\out{
}}\preformatted{PipeOp$new(id, param_set = ps(), param_vals = list(), input, output, packages = character(0), tags = character(0)) }\if{html}{\out{
}} \itemize{ \item \code{id} :: \code{character(1)}\cr diff --git a/man/PipeOpEnsemble.Rd b/man/PipeOpEnsemble.Rd index 3590cece9..27951ca02 100644 --- a/man/PipeOpEnsemble.Rd +++ b/man/PipeOpEnsemble.Rd @@ -14,7 +14,7 @@ for a \code{PipeOp} and requires deriving classes to create the \code{private$we Note: This object is typically constructed via a derived class, e.g. \code{\link{PipeOpClassifAvg}} or \code{\link{PipeOpRegrAvg}}. -\if{html}{\out{
}}\preformatted{PipeOpEnsemble$new(innum = 0, collect_multiplicity = FALSE, id, param_set = ParamSet$new(), param_vals = list(), packages = character(0), prediction_type = "Prediction") +\if{html}{\out{
}}\preformatted{PipeOpEnsemble$new(innum = 0, collect_multiplicity = FALSE, id, param_set = ps(), param_vals = list(), packages = character(0), prediction_type = "Prediction") }\if{html}{\out{
}} \itemize{ \item \code{innum} :: \code{numeric(1)}\cr diff --git a/man/PipeOpImpute.Rd b/man/PipeOpImpute.Rd index 4c64caf07..f604c1648 100644 --- a/man/PipeOpImpute.Rd +++ b/man/PipeOpImpute.Rd @@ -12,7 +12,7 @@ Abstract base class for feature imputation. \section{Construction}{ -\if{html}{\out{
}}\preformatted{PipeOpImpute$$new(id, param_set = ParamSet$new(), param_vals = list(), whole_task_dependent = FALSE, packages = character(0), task_type = "Task") +\if{html}{\out{
}}\preformatted{PipeOpImpute$$new(id, param_set = ps(), param_vals = list(), whole_task_dependent = FALSE, packages = character(0), task_type = "Task") }\if{html}{\out{
}} \itemize{ \item \code{id} :: \code{character(1)}\cr diff --git a/man/PipeOpTargetTrafo.Rd b/man/PipeOpTargetTrafo.Rd index 8d52573c7..f1f580ca3 100644 --- a/man/PipeOpTargetTrafo.Rd +++ b/man/PipeOpTargetTrafo.Rd @@ -19,7 +19,7 @@ Users can overload up to four \verb{private$}-functions: \code{.get_state()} (op \section{Construction}{ -\if{html}{\out{
}}\preformatted{PipeOpTargetTrafo$new(id, param_set = ParamSet$new(), param_vals = list() packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) +\if{html}{\out{
}}\preformatted{PipeOpTargetTrafo$new(id, param_set = ps(), param_vals = list() packages = character(0), task_type_in = "Task", task_type_out = task_type_in, tags = NULL) }\if{html}{\out{
}} \itemize{ \item \code{id} :: \code{character(1)}\cr diff --git a/man/PipeOpTaskPreproc.Rd b/man/PipeOpTaskPreproc.Rd index 96ce14261..dd8a47237 100644 --- a/man/PipeOpTaskPreproc.Rd +++ b/man/PipeOpTaskPreproc.Rd @@ -40,7 +40,7 @@ the \code{\link{PipeOpTaskPreprocSimple}} class can be used instead. \section{Construction}{ -\if{html}{\out{
}}\preformatted{PipeOpTaskPreproc$new(id, param_set = ParamSet$new(), param_vals = list(), can_subset_cols = TRUE, +\if{html}{\out{
}}\preformatted{PipeOpTaskPreproc$new(id, param_set = ps(), param_vals = list(), can_subset_cols = TRUE, packages = character(0), task_type = "Task", tags = NULL, feature_types = mlr_reflections$task_feature_types) }\if{html}{\out{
}} \itemize{ diff --git a/man/PipeOpTaskPreprocSimple.Rd b/man/PipeOpTaskPreprocSimple.Rd index e3e098259..620bfbbc9 100644 --- a/man/PipeOpTaskPreprocSimple.Rd +++ b/man/PipeOpTaskPreprocSimple.Rd @@ -26,7 +26,7 @@ This inherits from \code{\link{PipeOpTaskPreproc}} and behaves essentially the s \section{Construction}{ -\if{html}{\out{
}}\preformatted{PipeOpTaskPreprocSimple$new(id, param_set = ParamSet$new(), param_vals = list(), can_subset_cols = TRUE, packages = character(0), task_type = "Task") +\if{html}{\out{
}}\preformatted{PipeOpTaskPreprocSimple$new(id, param_set = ps(), param_vals = list(), can_subset_cols = TRUE, packages = character(0), task_type = "Task") }\if{html}{\out{
}} (Construction is identical to \code{\link{PipeOpTaskPreproc}}.) diff --git a/man/mlr_graphs_bagging.Rd b/man/mlr_graphs_bagging.Rd index 42828ef94..58d5e1e83 100644 --- a/man/mlr_graphs_bagging.Rd +++ b/man/mlr_graphs_bagging.Rd @@ -5,7 +5,13 @@ \alias{pipeline_bagging} \title{Create a bagging learner} \usage{ -pipeline_bagging(graph, iterations = 10, frac = 0.7, averager = NULL) +pipeline_bagging( + graph, + iterations = 10, + frac = 0.7, + averager = NULL, + replace = FALSE +) } \arguments{ \item{graph}{\code{\link{PipeOp}} | \code{\link{Graph}} \cr @@ -27,6 +33,10 @@ in order to perform simple averaging of classification and regression predictions respectively. If \code{NULL} (default), no averager is added to the end of the graph. Note that setting \code{collect_multipliciy = TRUE} during construction of the averager is required.} + +\item{replace}{\code{logical(1)} \cr +Whether to sample with replacement. +Default \code{FALSE}.} } \value{ \code{\link{Graph}} @@ -49,6 +59,11 @@ library(mlr3) lrn_po = po("learner", lrn("regr.rpart")) task = mlr_tasks$get("boston_housing") gr = pipeline_bagging(lrn_po, 3, averager = po("regravg", collect_multiplicity = TRUE)) -resample(task, GraphLearner$new(gr), rsmp("holdout")) +resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() + +# The original bagging method uses boosting by sampling with replacement. +gr = ppl("bagging", lrn_po, frac = 1, replace = TRUE, + averager = po("regravg", collect_multiplicity = TRUE)) +resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() } } diff --git a/man/mlr_pipeops_branch.Rd b/man/mlr_pipeops_branch.Rd index 23a434847..bfc8dcae0 100644 --- a/man/mlr_pipeops_branch.Rd +++ b/man/mlr_pipeops_branch.Rd @@ -24,9 +24,9 @@ Not to be confused with \code{\link{PipeOpCopy}}, the naming scheme is a bit unf \item \code{options} :: \code{numeric(1)} | \code{character}\cr If \code{options} is an integer number, it determines the number of output channels / options that are created, named \code{output1}...\verb{output}. The -\verb{$selection} parameter will then be a \code{\link{ParamInt}}. +\verb{$selection} parameter will then be an integer. If \code{options} is a \code{character}, it determines the names of channels directly. -The \verb{$selection} parameter will then be a \code{\link{ParamFct}}. +The \verb{$selection} parameter will then be factorial. \item \code{id} :: \code{character(1)}\cr Identifier of resulting object, default \code{"branch"}. \item \code{param_vals} :: named \code{list}\cr diff --git a/man/mlr_pipeops_updatetarget.Rd b/man/mlr_pipeops_updatetarget.Rd index 5469b0319..e7499b9b8 100644 --- a/man/mlr_pipeops_updatetarget.Rd +++ b/man/mlr_pipeops_updatetarget.Rd @@ -27,7 +27,7 @@ name of the data of the input \code{\link[mlr3:Task]{Task}}, this column is set \section{Construction}{ -\if{html}{\out{
}}\preformatted{PipeOpUpdateTarget$new(id, param_set = ParamSet$new(), +\if{html}{\out{
}}\preformatted{PipeOpUpdateTarget$new(id, param_set = ps(), param_vals = list(), packages = character(0)) }\if{html}{\out{
}} \itemize{ diff --git a/man/ppl.Rd b/man/ppl.Rd index df4cefc78..713312f33 100644 --- a/man/ppl.Rd +++ b/man/ppl.Rd @@ -28,7 +28,7 @@ named \code{list} is returned, but unlike \code{\link[=pos]{pos()}} it will not \description{ Creates a \code{\link{Graph}} from \code{\link{mlr_graphs}} from given ID -\code{ppl()} taks a \code{charcter(1)} and returns a \code{\link{Graph}}. \code{ppls()} takes a \code{character} +\code{ppl()} taks a \code{character(1)} and returns a \code{\link{Graph}}. \code{ppls()} takes a \code{character} vector of any list and returns a \code{list} of possibly muliple \code{\link{Graph}}s. } \examples{ diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 9e556cf79..1f3a711ca 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -35,6 +35,9 @@ navbar: rss: icon: fa-rss href: https://mlr-org.com/ + extending: + text: Extending + href: extending.html reference: - title: Package diff --git a/tests/testthat/helper_functions.R b/tests/testthat/helper_functions.R index cec7417d9..5eb4006b8 100644 --- a/tests/testthat/helper_functions.R +++ b/tests/testthat/helper_functions.R @@ -118,15 +118,29 @@ expect_valid_pipeop_param_set = function(po, check_ps_default_values = TRUE) { ps = po$param_set expect_true(every(ps$tags, function(x) length(intersect(c("train", "predict"), x)) > 0L)) - uties = ps$params[ps$ids("ParamUty")] - if (length(uties)) { - test_value = NO_DEF # custom_checks should fail for NO_DEF - results = map(uties, function(uty) { - uty$custom_check(test_value) - }) - expect_true(all(map_lgl(results, function(result) { - length(result) == 1L && (is.character(result) || result == TRUE) # result == TRUE is necessary because default is function(x) TRUE - })), label = "custom_check returns string on failure") + if (mlr3pipelines:::paradox_info$is_old) { + uties = ps$params[ps$ids("ParamUty")] + if (length(uties)) { + test_value = NO_DEF # custom_checks should fail for NO_DEF + results = map(uties, function(uty) { + uty$custom_check(test_value) + }) + expect_true(all(map_lgl(results, function(result) { + length(result) == 1L && (is.character(result) || result == TRUE) # result == TRUE is necessary because default is function(x) TRUE + })), label = "custom_check returns string on failure") + } + } else { + uties = ps$ids("ParamUty") + if (length(uties)) { + test_value = NO_DEF # custom_checks should fail for NO_DEF + results = map(uties, function(uty) { + psn = ps$subset(uty, allow_dangling_dependencies = TRUE) + psn$check(structure(list(test_value), names = uty)) + }) + expect_true(all(map_lgl(results, function(result) { + length(result) == 1L && (is.character(result) || result == TRUE) # result == TRUE is necessary because default is function(x) TRUE + })), label = "custom_check returns string on failure") + } } if (check_ps_default_values) { @@ -294,7 +308,7 @@ expect_datapreproc_pipeop_class = function(poclass, constargs = list(), task, expect_task(po$predict(list(emptytask))[[1]]) expect_equal(emptytaskfnames, po$predict(list(emptytask))[[1]]$feature_names) - if ("affect_columns" %in% names(po$param_set$params) && affect_context_independent) { + if ("affect_columns" %in% po$param_set$ids() && affect_context_independent) { selector = function(data) data$feature_names[-1] po2$param_set$values$affect_columns = selector trained.subset = po$train(list(task2))[[1]] diff --git a/tests/testthat/helper_test_pipeops.R b/tests/testthat/helper_test_pipeops.R index 921d463fb..5265177bb 100644 --- a/tests/testthat/helper_test_pipeops.R +++ b/tests/testthat/helper_test_pipeops.R @@ -1,7 +1,7 @@ PipeOpDebugBasic = R6Class("PipeOpDebugBasic", inherit = PipeOp, public = list( - initialize = function(id = "debug.basic", param_set = ParamSet$new()) { + initialize = function(id = "debug.basic", param_set = ps()) { super$initialize(id = id, param_set = param_set, input = data.table(name = "input", train = "*", predict = "*"), output = data.table(name = "output", train = "*", predict = "*") @@ -15,7 +15,8 @@ PipeOpDebugBasic = R6Class("PipeOpDebugBasic", .predict = function(inputs) { catf("Predicting %s", self$id) self$state = c(self$state, inputs) - } + }, + .additional_phash_input = function() NULL ) ) @@ -41,10 +42,10 @@ PipeOpDebugMulti = R6Class("PipeOpDebugMulti", if (is.numeric(outputs)) { outputs = paste0("output_", seq_len(outputs)) } - p = ParamInt$new(id = "par", lower = 0, upper = 10, default = 0, tags = c("train", "predict")) + p = ps(par = p_int(lower = 0, upper = 10, default = 0, tags = c("train", "predict"))) self$nin = length(inputs) self$nout = length(outputs) - super$initialize(id, ParamSet$new(list(p)), + super$initialize(id, param_set = p, input = data.table(name = inputs, train = "*", predict = "*"), output = data.table(name = outputs, train = "*", predict = "*")) }), @@ -60,7 +61,8 @@ PipeOpDebugMulti = R6Class("PipeOpDebugMulti", self$id, deparse_list_safe(inputs), deparse_list_safe(self$state)) iin = inputs[[1]] as.list(iin + seq_len(self$nout)) - } + }, + .additional_phash_input = function() c(self$nin, self$nout) ) ) @@ -81,6 +83,7 @@ VarargPipeop = R6Class("VarargPipeop", .predict = function(inputs) { self$state = inputs list(inputs) - } + }, + .additional_phash_input = function() self$input$name ) ) diff --git a/tests/testthat/test_GraphLearner.R b/tests/testthat/test_GraphLearner.R index 8bacaa935..18da2a98a 100644 --- a/tests/testthat/test_GraphLearner.R +++ b/tests/testthat/test_GraphLearner.R @@ -123,7 +123,7 @@ test_that("graphlearner parameters behave as they should", { dbgr = PipeOpScale$new() %>>% PipeOpLearner$new(dblrn) - expect_subset(c("scale.center", "scale.scale", "classif.debug.x"), names(dbgr$param_set$params)) + expect_subset(c("scale.center", "scale.scale", "classif.debug.x"), dbgr$param_set$ids()) dbgr$param_set$values$classif.debug.x = 1 diff --git a/tests/testthat/test_dictionary.R b/tests/testthat/test_dictionary.R index e3ae20d97..1a598668c 100644 --- a/tests/testthat/test_dictionary.R +++ b/tests/testthat/test_dictionary.R @@ -117,31 +117,37 @@ test_that("Dictionary contains all PipeOps", { expect_equal(other_obj$phash, test_obj$phash, info = paste(dictname, "$new id test 2")) expect_equal(inflate(do.call(pogen$new, args)), test_obj, info = dictname) + + tops = test_obj$param_set # we now check if hyperparameters can be changed through construction # we do this by automatically generating a hyperparameter value that deviates from the automatically constructed one. # However, for ParamUty we can't do that, so if there are only 'ParamUty' parameter we skip this part. - eligibleparams = test_obj$param_set$params[test_obj$param_set$class != "ParamUty"] - eligibleparams = discard(eligibleparams, function(p) { - # filter out discrete params with only one level, or the numeric parameters with $lower == $upper - # The use '&&' here is intentional, because numeric parameters have 0 levels, and discrete parameters have $lower == $upper (== NA) - length(p$levels) < 2 && isTRUE(all.equal(p$lower, p$upper)) - }) + eligibleparams = which( + tops$class != "ParamUty" & + # filter out discrete params with only one level, or the numeric parameters with $lower == $upper + # Note that numeric parameters have 0 levels, and discrete parameters have $lower == $upper (== NA) + ( + (!is.na(tops$lower) & tops$lower != tops$upper) | + (is.finite(tops$nlevels) & tops$nlevels > 1) + ) + ) if (length(eligibleparams)) { - testingparam = eligibleparams[[1]] + testingparam = tops$ids()[[eligibleparams[[1]]]] # we want to construct an object where the parameter value is *different* from the value it gets on construction by default. # For this we take a few candidate values and `setdiff` the original value - origval = as.atomic(test_obj$param_set$values[[testingparam$id]]) - if (testingparam$class %in% c("ParamLgl", "ParamFct")) { - candidates = testingparam$levels + origval = as.atomic(test_obj$param_set$values[[testingparam]]) + if (tops$class[[testingparam]] %in% c("ParamLgl", "ParamFct")) { + candidates = tops$levels[[testingparam]] } else { - candidates = Filter(function(x) is.finite(x) && !is.na(x), c(testingparam$lower, testingparam$upper, testingparam$lower + 1, 0, origval + 1)) + candidates = Filter(function(x) is.finite(x) && !is.na(x), + c(tops$lower[[testingparam]], tops$upper[[testingparam]], tops$lower[[testingparam]] + 1, 0, origval + 1)) } val = setdiff(candidates, origval)[1] # construct the `param_vals = list(PARNAME = PARVAL)` construction argument args$param_vals = list(val) - names(args$param_vals) = testingparam$id + names(args$param_vals) = testingparam # check that the constructed object is different from the test_obj, but setting the test_obj's parameter # makes them equal again. @@ -152,7 +158,7 @@ test_that("Dictionary contains all PipeOps", { # phash should be independent of this! expect_true(isTRUE(all.equal(dict_constructed$phash, test_obj$phash)), dictname) - test_obj$param_set$values[[testingparam$id]] = val + test_obj$param_set$values[[testingparam]] = val expect_equal(touch(dict_constructed), test_obj) expect_equal(inflate(touch(gen_constructed)), test_obj) diff --git a/tests/testthat/test_mlr_graphs_bagging.R b/tests/testthat/test_mlr_graphs_bagging.R index a5dc1067c..15a70fb0c 100644 --- a/tests/testthat/test_mlr_graphs_bagging.R +++ b/tests/testthat/test_mlr_graphs_bagging.R @@ -39,3 +39,36 @@ test_that("Bagging Pipeline", { expect_true(all(map_lgl(predict_out, function(x) "PredictionClassif" %in% class(x)))) }) +test_that("Bagging with replacement", { + tsk = tsk("iris") + lrn = lrn("classif.rpart") + p = ppl("bagging", graph = po(lrn), replace = TRUE, averager = po("classifavg", collect_multiplicity = TRUE)) + expect_graph(p) + res = resample(tsk, GraphLearner$new(p), rsmp("holdout")) + expect_resample_result(res) + + tsk$filter(1:140) + expect_equal(anyDuplicated(tsk$data()), 0) # make sure no duplicates + + p = ppl("bagging", iterations = 2, frac = 1, + graph = lrn("classif.debug", save_tasks = TRUE), + replace = TRUE, averager = po("classifavg", collect_multiplicity = TRUE) + ) + p$train(tsk) + + expect_true(anyDuplicated(p$pipeops$classif.debug$state[[1]]$model$task_train$data()) != 0) + + getOrigId = function(data) { + tsk$data()[, origline := .I][data, on = colnames(tsk$data()), origline] + } + orig_id_1 = getOrigId(p$pipeops$classif.debug$state[[1]]$model$task_train$data()) + orig_id_2 = getOrigId(p$pipeops$classif.debug$state[[2]]$model$task_train$data()) + + expect_equal(length(orig_id_1), 140) + expect_equal(length(orig_id_2), 140) + # if we sampled the same values twice, the all.equal() would just give TRUE + expect_string(all.equal(orig_id_1, orig_id_2)) + + expect_true(length(unique(orig_id_1)) < 140) + expect_true(length(unique(orig_id_2)) < 140) +}) diff --git a/tests/testthat/test_mlr_graphs_stacking.R b/tests/testthat/test_mlr_graphs_stacking.R index 79b5f04b0..8cd0d6735 100644 --- a/tests/testthat/test_mlr_graphs_stacking.R +++ b/tests/testthat/test_mlr_graphs_stacking.R @@ -10,7 +10,7 @@ test_that("Stacking Pipeline", { # default graph_stack = pipeline_stacking(base_learners, super_learner) expect_graph(graph_stack) - expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion", "super.rpart")) + expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion_stacking", "super.rpart")) graph_learner = as_learner(graph_stack) graph_learner$train(tsk("iris")) expect_class(graph_learner$model$super.rpart$model, "rpart") @@ -19,7 +19,7 @@ test_that("Stacking Pipeline", { # no nop graph_stack = pipeline_stacking(base_learners, super_learner, use_features = FALSE) expect_graph(graph_stack) - expect_names(graph_stack$ids(), identical.to = c("base.rpart", "featureunion", "super.rpart")) + expect_names(graph_stack$ids(), identical.to = c("base.rpart", "featureunion_stacking", "super.rpart")) graph_learner = as_learner(graph_stack) graph_learner$train(tsk("iris")) expect_class(graph_learner$model$super.rpart$model, "rpart") @@ -28,7 +28,7 @@ test_that("Stacking Pipeline", { # folds graph_stack = pipeline_stacking(base_learners, super_learner, folds = 5) expect_graph(graph_stack) - expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion", "super.rpart")) + expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion_stacking", "super.rpart")) graph_learner = as_learner(graph_stack) graph_learner$train(tsk("iris")) expect_equal(graph_learner$graph$pipeops$base.rpart$param_set$values$resampling.folds, 5) @@ -38,7 +38,7 @@ test_that("Stacking Pipeline", { # insample graph_stack = pipeline_stacking(base_learners, super_learner, method = "insample") expect_graph(graph_stack) - expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion", "super.rpart")) + expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion_stacking", "super.rpart")) graph_learner = as_learner(graph_stack) graph_learner$train(tsk("iris")) expect_equal(graph_learner$graph$pipeops$base.rpart$param_set$values$resampling.method, "insample") diff --git a/tests/testthat/test_multiplicities.R b/tests/testthat/test_multiplicities.R index e68c04bc2..a938ce1ca 100644 --- a/tests/testthat/test_multiplicities.R +++ b/tests/testthat/test_multiplicities.R @@ -64,9 +64,7 @@ test_that("PipeOp - evaluate_multiplicities", { public = list( initialize = function(num, id = "multiplicities", param_vals = list()) { assert_int(num, lower = 1L) - ps = ParamSet$new(params = list( - ParamUty$new("state", tags = "train") - )) + ps = ps(state = p_uty(tags = "train")) super$initialize(id, param_set = ps, param_vals = param_vals, input = data.table(name = rep_suffix("input", num), train = "*", predict = "*"), output = data.table(name = rep_suffix("output", num), train = "*", predict = "*"), diff --git a/tests/testthat/test_parvals.R b/tests/testthat/test_parvals.R index 6e6ed6f45..c4c3fa115 100644 --- a/tests/testthat/test_parvals.R +++ b/tests/testthat/test_parvals.R @@ -25,7 +25,7 @@ test_that("graph param vals", { expect_equal(gr$pipeops$pca$param_set$values$center, TRUE) expect_equal(gr$param_set$values$pca.center, TRUE) - expect_set_equal(names(gr$param_set$params), + expect_set_equal(gr$param_set$ids(), c("scale.center", "scale.scale" ,"scale.robust", "scale.affect_columns", "pca.center", "pca.scale.", "pca.rank.", "pca.affect_columns")) expect_error({ diff --git a/tests/testthat/test_pipeop_featureunion.R b/tests/testthat/test_pipeop_featureunion.R index 9687d1afb..f408fb196 100644 --- a/tests/testthat/test_pipeop_featureunion.R +++ b/tests/testthat/test_pipeop_featureunion.R @@ -257,3 +257,18 @@ test_that("featureunion - cbind_tasks - duplicates", { expect_equal(output$data(cols = "x"), inputs[[1L]]$data(cols = "x")) expect_equivalent(output$data(cols = c("Species", new_iris_names)), task1$data()) }) + +test_that("featureunion - does not drop 'x' column", { + task1 = as_task_regr(data.table( + z = 1:10, + y = 1:10 + ), target = "y") + + task2 = as_task_regr(data.table( + x = 1:10, + y = 1:10 + ), target = "y") + + taskout = po("featureunion")$train(list(task1, task2))[[1L]] + expect_permutation(taskout$feature_names, c("x", "z")) +}) diff --git a/tests/testthat/test_pipeop_filter.R b/tests/testthat/test_pipeop_filter.R index fda806a33..a02239fc5 100644 --- a/tests/testthat/test_pipeop_filter.R +++ b/tests/testthat/test_pipeop_filter.R @@ -50,7 +50,7 @@ test_that("PipeOpFilter parameters", { po = PipeOpFilter$new(mlr3filters::FilterVariance$new()) expect_set_equal(c("filter.nfeat", "filter.frac", "filter.cutoff", "filter.permuted"), - grep("^filter\\.", names(po$param_set$params), value = TRUE)) + grep("^filter\\.", po$param_set$ids(), value = TRUE)) po = po$clone(deep = TRUE) # cloning often breaks param connection diff --git a/tests/testthat/test_pipeop_impute.R b/tests/testthat/test_pipeop_impute.R index 0793bf321..9e66ea8fe 100644 --- a/tests/testthat/test_pipeop_impute.R +++ b/tests/testthat/test_pipeop_impute.R @@ -9,12 +9,12 @@ test_that("PipeOpImpute", { PipeOpTestImpute = R6Class("PipeOpTestImpute", inherit = PipeOpTaskPreprocSimple, public = list( initialize = function(id = "impute", param_vals = list()) { - ps = ParamSet$new(list( - ParamFct$new("method_num", levels = c("median", "mean", "mode", "sample", "hist", "oor", "constant"), tags = c("train", "predict")), - ParamFct$new("method_fct", levels = c("oor", "sample", "mode", "constant"), tags = c("train", "predict")), - ParamFct$new("add_dummy", levels = c("none", "missing_train", "all"), tags = c("train", "predict")), - ParamUty$new("innum", tags = c("train", "predict")) - )) + ps = ps( + method_num = p_fct(c("median", "mean", "mode", "sample", "hist", "oor", "constant"), tags = c("train", "predict")), + method_fct = p_fct(c("oor", "sample", "mode", "constant"), tags = c("train", "predict")), + add_dummy = p_fct(c("none", "missing_train", "all"), tags = c("train", "predict")), + innum = p_uty(tags = c("train", "predict")) + ) ps$values = list(method_num = "median", method_fct = "oor", add_dummy = "missing_train") super$initialize(id, ps, param_vals = param_vals) }, diff --git a/tests/testthat/test_pipeop_learner.R b/tests/testthat/test_pipeop_learner.R index 164d4a2aa..223a5292d 100644 --- a/tests/testthat/test_pipeop_learner.R +++ b/tests/testthat/test_pipeop_learner.R @@ -33,9 +33,17 @@ test_that("PipeOpLearner - param_set and values", { }) po$param_set$values$minsplit = 2L expect_equal(po$param_set$values, po$learner$param_set$values) - expect_equal(po$param_set$values, list(xval = 0L, minsplit = 2L)) + + sortnames = function(x) { + if (!is.null(names(x))) { + x <- x[order(names(x), decreasing = TRUE)] + } + x + } + + expect_equal(sortnames(po$param_set$values), list(xval = 0L, minsplit = 2L)) po$param_set$values$maxdepth = 1L - expect_equal(po$param_set$values, list(xval = 0L, minsplit = 2L, maxdepth = 1L)) + expect_equal(sortnames(po$param_set$values), list(xval = 0L, minsplit = 2L, maxdepth = 1L)) po$param_set$values = list(minsplit = 1L) expect_equal(po$param_set$values, list(minsplit = 1L)) expect_error({ diff --git a/tests/testthat/test_pipeop_learnercv.R b/tests/testthat/test_pipeop_learnercv.R index f80087ac5..bd369987a 100644 --- a/tests/testthat/test_pipeop_learnercv.R +++ b/tests/testthat/test_pipeop_learnercv.R @@ -41,7 +41,7 @@ test_that("PipeOpLearnerCV - basic properties", { test_that("PipeOpLearnerCV - param values", { lrn = mlr_learners$get("classif.rpart") polrn = PipeOpLearnerCV$new(lrn) - expect_subset(c("minsplit", "resampling.method", "resampling.folds"), names(polrn$param_set$params)) + expect_subset(c("minsplit", "resampling.method", "resampling.folds"), polrn$param_set$ids()) expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 3, resampling.keep_response = FALSE, xval = 0)) polrn$param_set$values$minsplit = 2 expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 3, resampling.keep_response = FALSE, minsplit = 2, xval = 0)) diff --git a/tests/testthat/test_po.R b/tests/testthat/test_po.R index fb6fc0559..3438330d3 100644 --- a/tests/testthat/test_po.R +++ b/tests/testthat/test_po.R @@ -55,7 +55,7 @@ test_that("mlr_pipeops access works", { dblrn = R6Class("debuglearn", inherit = LearnerClassif, public = list( initialize = function() { - super$initialize(id = "debuglearn", param_set = paradox::ParamSet$new()$add(paradox::ParamDbl$new("key"))) + super$initialize(id = "debuglearn", param_set = ps(key = p_dbl())) } ) ) @@ -159,7 +159,7 @@ test_that("mlr_pipeops multi-access works", { dblrn = R6Class("debuglearn", inherit = LearnerClassif, public = list( initialize = function() { - super$initialize(id = "debuglearn", param_set = paradox::ParamSet$new()$add(paradox::ParamDbl$new("key"))) + super$initialize(id = "debuglearn", param_set = ps(key = p_dbl())) } ) ) diff --git a/tests/testthat/test_ppl.R b/tests/testthat/test_ppl.R index 948cbbf79..9c88f868b 100644 --- a/tests/testthat/test_ppl.R +++ b/tests/testthat/test_ppl.R @@ -57,3 +57,18 @@ test_that("mlr_pipeops multi-access works", { expect_equal(ppls(), mlr_graphs) }) + +test_that("mlr3book authors don't sleepwalk through life", { + + tasks = tsks(c("breast_cancer", "sonar")) + + glrn_stack = as_learner(ppl("robustify") %>>% ppl("stacking", + lrns(c("classif.rpart", "classif.debug")), + lrn("classif.rpart", id = "classif.rpart2") + )) + glrn_stack$id = "Stack" + + learners = c(glrn_stack) + bmr = benchmark(benchmark_grid(tasks, learners, rsmp("cv", folds = 2))) + +})