Skip to content

Commit

Permalink
Merge branch 'master' into bundle
Browse files Browse the repository at this point in the history
  • Loading branch information
sebffischer authored Apr 9, 2024
2 parents 030b2b3 + a4933e1 commit 6d618ac
Show file tree
Hide file tree
Showing 93 changed files with 628 additions and 428 deletions.
6 changes: 6 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
45 changes: 45 additions & 0 deletions .github/workflows/dev-cmd-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# dev cmd check workflow of the mlr3 ecosystem v0.1.0
# https://github.com/mlr-org/actions
on:
workflow_dispatch:
push:
branches:
- master
pull_request:
branches:
- master

name: dev-check

jobs:
check-package:
runs-on: ${{ matrix.config.os }}

name: ${{ matrix.config.dev-package }}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

strategy:
fail-fast: false
matrix:
config:
- {os: ubuntu-latest, r: 'release', dev-package: "mlr-org/bbotk', 'mlr-org/mlr3learners', 'mlr-org/paradox"}

steps:
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check

- name: Install dev versions
run: pak::pkg_install(c('${{ matrix.config.dev-package }}'))
shell: Rscript {0}

- uses: r-lib/actions/check-r-package@v2
4 changes: 2 additions & 2 deletions .github/workflows/pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

Expand All @@ -44,7 +44,7 @@ jobs:

- name: Deploy
if: github.event_name != 'pull_request'
uses: JamesIves/github-pages-deploy-action@v4.4.1
uses: JamesIves/github-pages-deploy-action@v4.5.0
with:
clean: false
branch: gh-pages
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/r-cmd-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
- {os: ubuntu-latest, r: 'release'}

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-r@v2
with:
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mlr3pipelines
Title: Preprocessing Operators and Pipelines for 'mlr3'
Version: 0.5.0-9000
Version: 0.5.1-9000
Authors@R:
c(person(given = "Martin",
family = "Binder",
Expand Down
9 changes: 8 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
# mlr3pipelines 0.5.0-9000
# mlr3pipelines 0.5.1-9000

* Added marshaling support to `GraphLearner`

# mlr3pipelines 0.5.1

* Changed the ID of `PipeOpFeatureUnion` used in `ppl("robustify")` and `ppl("stacking")`.
* `pipeline_bagging()` gets the `replace` argument (old behaviour `FALSE` by default).
* Feature: The `$add_pipeop()` method got an argument `clone` (old behaviour `TRUE` by default).
* Bugfix: `PipeOpFeatureUnion` in some rare cases dropped variables called `"x"`.
* Compatibility with upcoming paradox release.

# mlr3pipelines 0.5.0-2

Expand Down
13 changes: 7 additions & 6 deletions R/Graph.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
#' * `phash` :: `character(1)` \cr
#' Stores a checksum calculated on the [`Graph`] configuration, which includes all [`PipeOp`] hashes
#' *except* their `$param_set$values`, and a hash of `$edges`.
#' * `keep_results` :: `logical(1)` \cr
#' * `keep_results` :: `logical(1)`\cr
#' Whether to store intermediate results in the [`PipeOp`]'s `$.result` slot, mostly for debugging purposes. Default `FALSE`.
#' * `man` :: `character(1)`\cr
#' Identifying string of the help page that shows with `help()`.
Expand All @@ -69,13 +69,14 @@
#' (`logical(1)`) -> `character` \cr
#' Get IDs of all [`PipeOp`]s. This is in order that [`PipeOp`]s were added if
#' `sorted` is `FALSE`, and topologically sorted if `sorted` is `TRUE`.
#' * `add_pipeop(op)` \cr
#' ([`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`) -> `self` \cr
#' * `add_pipeop(op, clone = TRUE)` \cr
#' ([`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`, `logical(1)`) -> `self` \cr
#' Mutates [`Graph`] by adding a [`PipeOp`] to the [`Graph`]. This does not add any edges, so the new [`PipeOp`]
#' will not be connected within the [`Graph`] at first.\cr
#' Instead of supplying a [`PipeOp`] directly, an object that can naturally be converted to a [`PipeOp`] can also
#' be supplied, e.g. a [`Learner`][mlr3::Learner] or a [`Filter`][mlr3filters::Filter]; see [`as_pipeop()`].
#' The argument given as `op` is always cloned; to access a `Graph`'s [`PipeOp`]s by-reference, use `$pipeops`.\cr
#' The argument given as `op` is cloned if `clone` is `TRUE` (default); to access a `Graph`'s [`PipeOp`]s
#' by-reference, use `$pipeops`.\cr
#' Note that `$add_pipeop()` is a relatively low-level operation, it is recommended to build graphs using [`%>>%`].
#' * `add_edge(src_id, dst_id, src_channel = NULL, dst_channel = NULL)` \cr
#' (`character(1)`, `character(1)`,
Expand Down Expand Up @@ -181,8 +182,8 @@ Graph = R6Class("Graph",
topo_sort(tmp)$id
},

add_pipeop = function(op) {
op = as_pipeop(op, clone = TRUE)
add_pipeop = function(op, clone = TRUE) {
op = as_pipeop(op, clone = assert_flag(clone))
if (op$id %in% names(self$pipeops)) {
stopf("PipeOp with id '%s' already in Graph", op$id)
}
Expand Down
29 changes: 14 additions & 15 deletions R/LearnerAvg.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@
LearnerClassifAvg = R6Class("LearnerClassifAvg", inherit = LearnerClassif,
public = list(
initialize = function(id = "classif.avg") {
ps = ParamSet$new(params = list(
ParamUty$new("measure", custom_check = check_class_or_character("MeasureClassif", mlr_measures), tags = "train"),
ParamUty$new("optimizer", custom_check = check_optimizer, tags = "train"),
ParamUty$new("log_level", tags = "train",
ps = ps(
measure = p_uty(custom_check = check_class_or_character("MeasureClassif", mlr_measures), tags = "train"),
optimizer = p_uty(custom_check = check_optimizer, tags = "train"),
log_level = p_uty(tags = "train",
function(x) check_string(x) %check||% check_integerish(x))
))
)
ps$values = list(measure = "classif.ce", optimizer = "nloptr", log_level = "warn")
super$initialize(
id = id,
Expand Down Expand Up @@ -132,12 +132,12 @@ LearnerClassifAvg = R6Class("LearnerClassifAvg", inherit = LearnerClassif,
LearnerRegrAvg = R6Class("LearnerRegrAvg", inherit = LearnerRegr,
public = list(
initialize = function(id = "regr.avg") {
ps = ParamSet$new(params = list(
ParamUty$new("measure", custom_check = check_class_or_character("MeasureRegr", mlr_measures), tags = "train"),
ParamUty$new("optimizer", custom_check = check_optimizer, tags = "train"),
ParamUty$new("log_level", tags = "train",
ps = ps(
measure = p_uty(custom_check = check_class_or_character("MeasureRegr", mlr_measures), tags = "train"),
optimizer = p_uty(custom_check = check_optimizer, tags = "train"),
log_level = p_uty(tags = "train",
function(x) check_string(x) %check||% check_integerish(x))
))
)
ps$values = list(measure = "regr.mse", optimizer = "nloptr", log_level = "warn")
super$initialize(
id = id,
Expand Down Expand Up @@ -185,10 +185,9 @@ optimize_weights_learneravg = function(self, task, n_weights, data) {
}

pars = self$param_set$get_values(tags = "train")
ps = ParamSet$new(params = imap(data, function(x, n) {
if (is.numeric(n)) n = paste0("w.", n)
ParamDbl$new(id = n, lower = 0, upper = 1)
}))
pl = rep(list(p_dbl(0, 1)), length(data))
names(pl) = names(data) %??% paste0("w.", seq_along(data))
ps = do.call(ps, pl)
optimizer = pars$optimizer
if (inherits(optimizer, "character")) {
optimizer = bbotk::opt(optimizer)
Expand All @@ -198,7 +197,7 @@ optimize_weights_learneravg = function(self, task, n_weights, data) {
}
measure = pars$measure
if (is.character(measure)) measure = msr(measure)
codomain = ParamSet$new(list(ParamDbl$new(id = measure$id, tags = ifelse(measure$minimize, "minimize", "maximize"))))
codomain = do.call(paradox::ps, structure(list(p_dbl(tags = ifelse(measure$minimize, "minimize", "maximize"))), names = measure$id))
objfun = bbotk::ObjectiveRFun$new(
fun = function(xs) learneravg_objfun(xs, task = task, measure = measure, avg_weight_fun = self$weighted_average_prediction, data = data),
domain = ps, codomain = codomain
Expand Down
8 changes: 4 additions & 4 deletions R/PipeOp.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#'
#' @section Construction:
#' ```
#' PipeOp$new(id, param_set = ParamSet$new(), param_vals = list(), input, output, packages = character(0), tags = character(0))
#' PipeOp$new(id, param_set = ps(), param_vals = list(), input, output, packages = character(0), tags = character(0))
#' ```
#'
#' * `id` :: `character(1)`\cr
Expand Down Expand Up @@ -236,7 +236,7 @@ PipeOp = R6Class("PipeOp",
.result = NULL,
tags = NULL,

initialize = function(id, param_set = ParamSet$new(), param_vals = list(), input, output, packages = character(0), tags = "abstract") {
initialize = function(id, param_set = ps(), param_vals = list(), input, output, packages = character(0), tags = "abstract") {
if (inherits(param_set, "ParamSet")) {
private$.param_set = assert_param_set(param_set)
private$.param_set_source = NULL
Expand Down Expand Up @@ -338,7 +338,7 @@ PipeOp = R6Class("PipeOp",
id = function(val) {
if (!missing(val)) {
private$.id = val
if (!is.null(private$.param_set)) {
if (paradox_info$is_old && !is.null(private$.param_set)) {
# private$.param_set may be NULL if it is constructed dynamically by active binding
private$.param_set$set_id = val
}
Expand All @@ -353,7 +353,7 @@ PipeOp = R6Class("PipeOp",
} else {
private$.param_set = sourcelist[[1]]
}
if (!is.null(self$id)) {
if (paradox_info$is_old && !is.null(self$id)) {
private$.param_set$set_id = self$id
}
}
Expand Down
12 changes: 6 additions & 6 deletions R/PipeOpBoxCox.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,12 @@ PipeOpBoxCox = R6Class("PipeOpBoxCox",
inherit = PipeOpTaskPreproc,
public = list(
initialize = function(id = "boxcox", param_vals = list()) {
ps = ParamSet$new(params = list(
ParamLgl$new("standardize", default = TRUE, tags = c("train", "boxcox")),
ParamDbl$new("eps", default = 0.001, lower = 0, tags = c("train", "boxcox")),
ParamDbl$new("lower", tags = c("train", "boxcox")),
ParamDbl$new("upper", tags = c("train", "boxcox"))
))
ps = ps(
standardize = p_lgl(default = TRUE, tags = c("train", "boxcox")),
eps = p_dbl(default = 0.001, lower = 0, tags = c("train", "boxcox")),
lower = p_dbl(tags = c("train", "boxcox")),
upper = p_dbl(tags = c("train", "boxcox"))
)
super$initialize(id, param_set = ps, param_vals = param_vals,
packages = "bestNormalize", feature_types = c("numeric", "integer"))
}
Expand Down
10 changes: 5 additions & 5 deletions R/PipeOpBranch.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
#' * `options` :: `numeric(1)` | `character`\cr
#' If `options` is an integer number, it determines the number of
#' output channels / options that are created, named `output1`...`output<n>`. The
#' `$selection` parameter will then be a [`ParamInt`].
#' `$selection` parameter will then be an integer.
#' If `options` is a `character`, it determines the names of channels directly.
#' The `$selection` parameter will then be a [`ParamFct`].
#' The `$selection` parameter will then be factorial.
#' * `id` :: `character(1)`\cr
#' Identifier of resulting object, default `"branch"`.
#' * `param_vals` :: named `list`\cr
Expand Down Expand Up @@ -90,14 +90,14 @@ PipeOpBranch = R6Class("PipeOpBranch",
)
if (is.numeric(options)) {
options = round(options)
param = ParamInt$new("selection", lower = 1L, upper = options, tags = c("train", "predict", "required"))
param = p_int(lower = 1L, upper = options, tags = c("train", "predict", "required"))
options = rep_suffix("output", options)
initval = 1
} else {
param = ParamFct$new("selection", levels = options, tags = c("train", "predict", "required"))
param = p_fct(options, tags = c("train", "predict", "required"))
initval = options[1]
}
ps = ParamSet$new(params = list(param))
ps = ps(selection = param)
ps$values$selection = initval
super$initialize(id, ps, param_vals,
input = data.table(name = "input", train = "*", predict = "*"),
Expand Down
6 changes: 3 additions & 3 deletions R/PipeOpChunk.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ PipeOpChunk = R6Class("PipeOpChunk",
public = list(
initialize = function(outnum, id = "chunk", param_vals = list()) {
outnum = assert_int(outnum, lower = 1L)
ps = ParamSet$new(params = list(
ParamLgl$new("shuffle", tags = "train")
))
ps = ps(
shuffle = p_lgl(tags = "train")
)
ps$values = list(shuffle = TRUE)
super$initialize(id,
param_set = ps, param_vals = param_vals,
Expand Down
14 changes: 6 additions & 8 deletions R/PipeOpClassBalancing.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,12 @@ PipeOpClassBalancing = R6Class("PipeOpClassBalancing",

public = list(
initialize = function(id = "classbalancing", param_vals = list()) {
ps = ParamSet$new(params = list(
ParamDbl$new("ratio", lower = 0, upper = Inf, tags = "train"),
ParamFct$new("reference",
levels = c("all", "major", "minor", "nonmajor", "nonminor", "one"), tags = "train"),
ParamFct$new("adjust",
levels = c("all", "major", "minor", "nonmajor", "nonminor", "upsample", "downsample"), tags = "train"),
ParamLgl$new("shuffle", tags = "train")
))
ps = ps(
ratio = p_dbl(lower = 0, upper = Inf, tags = "train"),
reference = p_fct(c("all", "major", "minor", "nonmajor", "nonminor", "one"), tags = "train"),
adjust = p_fct(c("all", "major", "minor", "nonmajor", "nonminor", "upsample", "downsample"), tags = "train"),
shuffle = p_lgl(tags = "train")
)
ps$values = list(ratio = 1, reference = "all", adjust = "all", shuffle = TRUE)
super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, task_type = "TaskClassif", tags = "imbalanced data")
}
Expand Down
6 changes: 3 additions & 3 deletions R/PipeOpClassWeights.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ PipeOpClassWeights = R6Class("PipeOpClassWeights",

public = list(
initialize = function(id = "classweights", param_vals = list()) {
ps = ParamSet$new(params = list(
ParamDbl$new("minor_weight", lower = 0, upper = Inf, tags = "train")
))
ps = ps(
minor_weight = p_dbl(lower = 0, upper = Inf, tags = "train")
)
ps$values = list(minor_weight = 1)
super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, task_type = "TaskClassif", tags = "imbalanced data")
}
Expand Down
6 changes: 3 additions & 3 deletions R/PipeOpColApply.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ PipeOpColApply = R6Class("PipeOpColApply",
inherit = PipeOpTaskPreprocSimple,
public = list(
initialize = function(id = "colapply", param_vals = list()) {
ps = ParamSet$new(params = list(
ParamUty$new("applicator", custom_check = check_function, tags = c("train", "predict"))
))
ps = ps(
applicator = p_uty(custom_check = check_function, tags = c("train", "predict"))
)
ps$values = list(applicator = identity)
super$initialize(id, ps, param_vals = param_vals)
}
Expand Down
6 changes: 3 additions & 3 deletions R/PipeOpColRoles.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ PipeOpColRoles = R6Class("PipeOpColRoles",
inherit = PipeOpTaskPreprocSimple,
public = list(
initialize = function(id = "colroles", param_vals = list()) {
ps = ParamSet$new(params = list(
ps = ps(
# named list, each entry with a vector of roles
ParamUty$new("new_role", tags = c("train", "predict"), custom_check = function(x) {
new_role = p_uty(tags = c("train", "predict"), custom_check = function(x) {
first_check = check_list(x, types = "character", any.missing = FALSE, min.len = 1L, names = "named")
# return the error directly if this failed
if (is.character(first_check)) {
Expand All @@ -69,7 +69,7 @@ PipeOpColRoles = R6Class("PipeOpColRoles",
all_col_roles = unique(unlist(mlr3::mlr_reflections$task_col_roles))
check_subset(unlist(x), all_col_roles[all_col_roles != "target"])
})
))
)
super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE)
}
),
Expand Down
8 changes: 4 additions & 4 deletions R/PipeOpCollapseFactors.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@ PipeOpCollapseFactors = R6Class("PipeOpCollapseFactors",
inherit = PipeOpTaskPreprocSimple,
public = list(
initialize = function(id = "collapsefactors", param_vals = list()) {
ps = ParamSet$new(params = list(
ParamDbl$new("no_collapse_above_prevalence", 0, 1, tags = c("train", "predict")),
ParamInt$new("target_level_count", 2, tags = c("train", "predict"))
))
ps = ps(
no_collapse_above_prevalence = p_dbl(0, 1, tags = c("train", "predict")),
target_level_count = p_int(2, tags = c("train", "predict"))
)
ps$values = list(no_collapse_above_prevalence = 1, target_level_count = 2)
super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("factor", "ordered"))
}
Expand Down
Loading

0 comments on commit 6d618ac

Please sign in to comment.