diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..5ace4600a --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/dev-cmd-check.yml b/.github/workflows/dev-cmd-check.yml index b61ffa4bb..77e4a431e 100644 --- a/.github/workflows/dev-cmd-check.yml +++ b/.github/workflows/dev-cmd-check.yml @@ -27,7 +27,7 @@ jobs: - {os: ubuntu-latest, r: 'release', dev-package: "mlr-org/bbotk', 'mlr-org/mlr3learners', 'mlr-org/paradox"} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml index 09f42a5a8..85cf2ff72 100644 --- a/.github/workflows/pkgdown.yml +++ b/.github/workflows/pkgdown.yml @@ -23,7 +23,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -44,7 +44,7 @@ jobs: - name: Deploy if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.4.1 + uses: JamesIves/github-pages-deploy-action@v4.5.0 with: clean: false branch: gh-pages diff --git a/.github/workflows/r-cmd-check.yml b/.github/workflows/r-cmd-check.yml index fbd78ab3c..8d13d0e56 100644 --- a/.github/workflows/r-cmd-check.yml +++ b/.github/workflows/r-cmd-check.yml @@ -28,7 +28,7 @@ jobs: - {os: ubuntu-latest, r: 'release'} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: diff --git a/DESCRIPTION b/DESCRIPTION index 5e6b8a1ef..10fb81ecd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -93,7 +93,7 @@ Config/testthat/edition: 3 Config/testthat/parallel: true NeedsCompilation: no Roxygen: list(markdown = TRUE, r6 = FALSE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.2.3.9000 VignetteBuilder: knitr Collate: 'Graph.R' diff --git a/NAMESPACE b/NAMESPACE index a40fd7b44..c0b0975d9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -148,6 +148,7 @@ import(mlr3) import(mlr3misc) import(paradox) importFrom(R6,R6Class) +importFrom(data.table,as.data.table) importFrom(digest,digest) importFrom(stats,setNames) importFrom(utils,bibentry) diff --git a/NEWS.md b/NEWS.md index a4ecaa7a6..867ed08a4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,9 @@ # mlr3pipelines 0.5.0-9000 -* `pipeline_bagging()` gets the `replace` argument. -* Compatibility with upcoming paradox release +* `pipeline_bagging()` gets the `replace` argument (old behaviour `FALSE` by default). +* Feature: The `$add_pipeop()` method got an argument `clone` (old behaviour `TRUE` by default). +* Bugfix: `PipeOpFeatureUnion` in some rare cases dropped variables called `"x"`. +* Compatibility with upcoming paradox release. # mlr3pipelines 0.5.0-2 diff --git a/R/Graph.R b/R/Graph.R index 8cc95a0ae..7df319808 100644 --- a/R/Graph.R +++ b/R/Graph.R @@ -59,7 +59,7 @@ #' * `phash` :: `character(1)` \cr #' Stores a checksum calculated on the [`Graph`] configuration, which includes all [`PipeOp`] hashes #' *except* their `$param_set$values`, and a hash of `$edges`. -#' * `keep_results` :: `logical(1)` \cr +#' * `keep_results` :: `logical(1)`\cr #' Whether to store intermediate results in the [`PipeOp`]'s `$.result` slot, mostly for debugging purposes. Default `FALSE`. #' * `man` :: `character(1)`\cr #' Identifying string of the help page that shows with `help()`. @@ -69,13 +69,14 @@ #' (`logical(1)`) -> `character` \cr #' Get IDs of all [`PipeOp`]s. This is in order that [`PipeOp`]s were added if #' `sorted` is `FALSE`, and topologically sorted if `sorted` is `TRUE`. -#' * `add_pipeop(op)` \cr -#' ([`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`) -> `self` \cr +#' * `add_pipeop(op, clone = TRUE)` \cr +#' ([`PipeOp`] | [`Learner`][mlr3::Learner] | [`Filter`][mlr3filters::Filter] | `...`, `logical(1)`) -> `self` \cr #' Mutates [`Graph`] by adding a [`PipeOp`] to the [`Graph`]. This does not add any edges, so the new [`PipeOp`] #' will not be connected within the [`Graph`] at first.\cr #' Instead of supplying a [`PipeOp`] directly, an object that can naturally be converted to a [`PipeOp`] can also #' be supplied, e.g. a [`Learner`][mlr3::Learner] or a [`Filter`][mlr3filters::Filter]; see [`as_pipeop()`]. -#' The argument given as `op` is always cloned; to access a `Graph`'s [`PipeOp`]s by-reference, use `$pipeops`.\cr +#' The argument given as `op` is cloned if `clone` is `TRUE` (default); to access a `Graph`'s [`PipeOp`]s +#' by-reference, use `$pipeops`.\cr #' Note that `$add_pipeop()` is a relatively low-level operation, it is recommended to build graphs using [`%>>%`]. #' * `add_edge(src_id, dst_id, src_channel = NULL, dst_channel = NULL)` \cr #' (`character(1)`, `character(1)`, @@ -181,8 +182,8 @@ Graph = R6Class("Graph", topo_sort(tmp)$id }, - add_pipeop = function(op) { - op = as_pipeop(op, clone = TRUE) + add_pipeop = function(op, clone = TRUE) { + op = as_pipeop(op, clone = assert_flag(clone)) if (op$id %in% names(self$pipeops)) { stopf("PipeOp with id '%s' already in Graph", op$id) } diff --git a/R/PipeOpFeatureUnion.R b/R/PipeOpFeatureUnion.R index 8a2c1b4ce..bbc1e890b 100644 --- a/R/PipeOpFeatureUnion.R +++ b/R/PipeOpFeatureUnion.R @@ -202,6 +202,7 @@ cbind_tasks = function(inputs, assert_targets_equal, inprefix) { # again done by reference new_features = unlist(c(list(data.table(x = vector(length = task$nrow))), map(tail(inputs, -1L), .f = function(y) y$data(ids, cols = y$feature_names))), recursive = FALSE) + names(new_features)[1] = make.unique(rev(names(new_features)))[[length(new_features)]] # we explicitly have to subset to the unique column names, otherwise task$cbind() complains for data.table backends new_features = new_features[unique(names(new_features))] diff --git a/R/assert_graph.R b/R/assert_graph.R index 03c723e14..fadaa22b7 100644 --- a/R/assert_graph.R +++ b/R/assert_graph.R @@ -39,8 +39,10 @@ as_graph = function(x, clone = FALSE) { } #' @export -as_graph.default = function(x, clone = FALSE) { - Graph$new()$add_pipeop(x) # add_pipeop always clones and checks automatically for convertability +as_graph.default = function(x, clone = TRUE) { + # different default than other methods for backwards compatibility + # previously $add_pipeop() always cloned its input + Graph$new()$add_pipeop(x, clone = clone) } #' @export diff --git a/man/Graph.Rd b/man/Graph.Rd index 8da82dac3..ab76b20c8 100644 --- a/man/Graph.Rd +++ b/man/Graph.Rd @@ -69,7 +69,7 @@ Stores a checksum calculated on the \code{\link{Graph}} configuration, which inc \item \code{phash} :: \code{character(1)} \cr Stores a checksum calculated on the \code{\link{Graph}} configuration, which includes all \code{\link{PipeOp}} hashes \emph{except} their \verb{$param_set$values}, and a hash of \verb{$edges}. -\item \code{keep_results} :: \code{logical(1)} \cr +\item \code{keep_results} :: \code{logical(1)}\cr Whether to store intermediate results in the \code{\link{PipeOp}}'s \verb{$.result} slot, mostly for debugging purposes. Default \code{FALSE}. \item \code{man} :: \code{character(1)}\cr Identifying string of the help page that shows with \code{help()}. @@ -83,13 +83,14 @@ Identifying string of the help page that shows with \code{help()}. (\code{logical(1)}) -> \code{character} \cr Get IDs of all \code{\link{PipeOp}}s. This is in order that \code{\link{PipeOp}}s were added if \code{sorted} is \code{FALSE}, and topologically sorted if \code{sorted} is \code{TRUE}. -\item \code{add_pipeop(op)} \cr -(\code{\link{PipeOp}} | \code{\link[mlr3:Learner]{Learner}} | \code{\link[mlr3filters:Filter]{Filter}} | \code{...}) -> \code{self} \cr +\item \code{add_pipeop(op, clone = TRUE)} \cr +(\code{\link{PipeOp}} | \code{\link[mlr3:Learner]{Learner}} | \code{\link[mlr3filters:Filter]{Filter}} | \code{...}, \code{logical(1)}) -> \code{self} \cr Mutates \code{\link{Graph}} by adding a \code{\link{PipeOp}} to the \code{\link{Graph}}. This does not add any edges, so the new \code{\link{PipeOp}} will not be connected within the \code{\link{Graph}} at first.\cr Instead of supplying a \code{\link{PipeOp}} directly, an object that can naturally be converted to a \code{\link{PipeOp}} can also be supplied, e.g. a \code{\link[mlr3:Learner]{Learner}} or a \code{\link[mlr3filters:Filter]{Filter}}; see \code{\link[=as_pipeop]{as_pipeop()}}. -The argument given as \code{op} is always cloned; to access a \code{Graph}'s \code{\link{PipeOp}}s by-reference, use \verb{$pipeops}.\cr +The argument given as \code{op} is cloned if \code{clone} is \code{TRUE} (default); to access a \code{Graph}'s \code{\link{PipeOp}}s +by-reference, use \verb{$pipeops}.\cr Note that \verb{$add_pipeop()} is a relatively low-level operation, it is recommended to build graphs using \code{\link{\%>>\%}}. \item \code{add_edge(src_id, dst_id, src_channel = NULL, dst_channel = NULL)} \cr (\code{character(1)}, \code{character(1)}, diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 9e556cf79..1f3a711ca 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -35,6 +35,9 @@ navbar: rss: icon: fa-rss href: https://mlr-org.com/ + extending: + text: Extending + href: extending.html reference: - title: Package diff --git a/tests/testthat/test_pipeop_featureunion.R b/tests/testthat/test_pipeop_featureunion.R index 9687d1afb..f408fb196 100644 --- a/tests/testthat/test_pipeop_featureunion.R +++ b/tests/testthat/test_pipeop_featureunion.R @@ -257,3 +257,18 @@ test_that("featureunion - cbind_tasks - duplicates", { expect_equal(output$data(cols = "x"), inputs[[1L]]$data(cols = "x")) expect_equivalent(output$data(cols = c("Species", new_iris_names)), task1$data()) }) + +test_that("featureunion - does not drop 'x' column", { + task1 = as_task_regr(data.table( + z = 1:10, + y = 1:10 + ), target = "y") + + task2 = as_task_regr(data.table( + x = 1:10, + y = 1:10 + ), target = "y") + + taskout = po("featureunion")$train(list(task1, task2))[[1L]] + expect_permutation(taskout$feature_names, c("x", "z")) +})