Skip to content

Commit

Permalink
Merge pull request #737 from mlr-org/feat/keep_results
Browse files Browse the repository at this point in the history
Minor requirements for mlr3torch
  • Loading branch information
mb706 authored Mar 26, 2024
2 parents 2fea863 + 0f9c27f commit 82e4022
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 13 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ Config/testthat/edition: 3
Config/testthat/parallel: true
NeedsCompilation: no
Roxygen: list(markdown = TRUE, r6 = FALSE)
RoxygenNote: 7.2.3
RoxygenNote: 7.2.3.9000
VignetteBuilder: knitr
Collate:
'Graph.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ import(mlr3)
import(mlr3misc)
import(paradox)
importFrom(R6,R6Class)
importFrom(data.table,as.data.table)
importFrom(digest,digest)
importFrom(stats,setNames)
importFrom(utils,bibentry)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# mlr3pipelines 0.5.0-9000

* Feature: The `$add_pipeop()` method got an argument `clone` (old behaviour `TRUE` by default)
* Bugfix: `PipeOpFeatureUnion` in some rare cases dropped variables called `"x"`
* Compatibility with upcoming paradox release

# mlr3pipelines 0.5.0-2
Expand Down
13 changes: 7 additions & 6 deletions R/Graph.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
#' * `phash` :: `character(1)` \cr
#' Stores a checksum calculated on the [`Graph`] configuration, which includes all [`PipeOp`] hashes
#' *except* their `$param_set$values`, and a hash of `$edges`.
#' * `keep_results` :: `logical(1)` \cr
#' * `keep_results` :: `logical(1)`\cr
#' Whether to store intermediate results in the [`PipeOp`]'s `$.result` slot, mostly for debugging purposes. Default `FALSE`.
#' * `man` :: `character(1)`\cr
#' Identifying string of the help page that shows with `help()`.
Expand All @@ -69,13 +69,14 @@
#' (`logical(1)`) -> `character` \cr
#' Get IDs of all [`PipeOp`]s. This is in order that [`PipeOp`]s were added if
#' `sorted` is `FALSE`, and topologically sorted if `sorted` is `TRUE`.
#' * `add_pipeop(op)` \cr
#' ([`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`) -> `self` \cr
#' * `add_pipeop(op, clone = TRUE)` \cr
#' ([`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`, `logical(1)`) -> `self` \cr
#' Mutates [`Graph`] by adding a [`PipeOp`] to the [`Graph`]. This does not add any edges, so the new [`PipeOp`]
#' will not be connected within the [`Graph`] at first.\cr
#' Instead of supplying a [`PipeOp`] directly, an object that can naturally be converted to a [`PipeOp`] can also
#' be supplied, e.g. a [`Learner`][mlr3::Learner] or a [`Filter`][mlr3filters::Filter]; see [`as_pipeop()`].
#' The argument given as `op` is always cloned; to access a `Graph`'s [`PipeOp`]s by-reference, use `$pipeops`.\cr
#' The argument given as `op` is cloned if `clone` is `TRUE` (default); to access a `Graph`'s [`PipeOp`]s
#' by-reference, use `$pipeops`.\cr
#' Note that `$add_pipeop()` is a relatively low-level operation, it is recommended to build graphs using [`%>>%`].
#' * `add_edge(src_id, dst_id, src_channel = NULL, dst_channel = NULL)` \cr
#' (`character(1)`, `character(1)`,
Expand Down Expand Up @@ -181,8 +182,8 @@ Graph = R6Class("Graph",
topo_sort(tmp)$id
},

add_pipeop = function(op) {
op = as_pipeop(op, clone = TRUE)
add_pipeop = function(op, clone = TRUE) {
op = as_pipeop(op, clone = assert_flag(clone))
if (op$id %in% names(self$pipeops)) {
stopf("PipeOp with id '%s' already in Graph", op$id)
}
Expand Down
1 change: 1 addition & 0 deletions R/PipeOpFeatureUnion.R
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ cbind_tasks = function(inputs, assert_targets_equal, inprefix) {
# again done by reference
new_features = unlist(c(list(data.table(x = vector(length = task$nrow))),
map(tail(inputs, -1L), .f = function(y) y$data(ids, cols = y$feature_names))), recursive = FALSE)
names(new_features)[1] = make.unique(rev(names(new_features)))[[length(new_features)]]

# we explicitly have to subset to the unique column names, otherwise task$cbind() complains for data.table backends
new_features = new_features[unique(names(new_features))]
Expand Down
6 changes: 4 additions & 2 deletions R/assert_graph.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@ as_graph = function(x, clone = FALSE) {
}

#' @export
as_graph.default = function(x, clone = FALSE) {
Graph$new()$add_pipeop(x) # add_pipeop always clones and checks automatically for convertability
as_graph.default = function(x, clone = TRUE) {
# different default than other methods for backwards compatibility
# previously $add_pipeop() always cloned its input
Graph$new()$add_pipeop(x, clone = clone)
}

#' @export
Expand Down
9 changes: 5 additions & 4 deletions man/Graph.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions tests/testthat/test_pipeop_featureunion.R
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,18 @@ test_that("featureunion - cbind_tasks - duplicates", {
expect_equal(output$data(cols = "x"), inputs[[1L]]$data(cols = "x"))
expect_equivalent(output$data(cols = c("Species", new_iris_names)), task1$data())
})

test_that("featureunion - does not drop 'x' column", {
task1 = as_task_regr(data.table(
z = 1:10,
y = 1:10
), target = "y")

task2 = as_task_regr(data.table(
x = 1:10,
y = 1:10
), target = "y")

taskout = po("featureunion")$train(list(task1, task2))[[1L]]
expect_permutation(taskout$feature_names, c("x", "z"))
})

0 comments on commit 82e4022

Please sign in to comment.