From 0804b4464c5f10a807c709c64094faf5374929b7 Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 26 Mar 2024 17:38:21 +0100 Subject: [PATCH 1/3] rename featureunion in ppls --- NEWS.md | 1 + R/pipeline_robustify.R | 2 +- R/pipeline_stacking.R | 2 +- tests/testthat/test_ppl.R | 16 ++++++++++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index dba77a500..aac4a0302 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # mlr3pipelines 0.5.0-9000 +* Changed the ID of `PipeOpFeatureUnion` used in `ppl("robustify")` and `ppl("stacking")` * Feature: The `$add_pipeop()` method got an argument `clone` (old behaviour `TRUE` by default) * Bugfix: `PipeOpFeatureUnion` in some rare cases dropped variables called `"x"` * Compatibility with upcoming paradox release diff --git a/R/pipeline_robustify.R b/R/pipeline_robustify.R index 1d9774627..abb386c4b 100644 --- a/R/pipeline_robustify.R +++ b/R/pipeline_robustify.R @@ -170,7 +170,7 @@ pipeline_robustify = function(task = NULL, learner = NULL, imputing, po("missind", affect_columns = selector_type(c("numeric", "integer", "logical")), type = if (missind_numeric) "numeric" else "factor") )), - if (has_numbers || has_logicals) po("featureunion"), + if (has_numbers || has_logicals) po("featureunion", id = "featureunion_robustify"), if (has_factorials) po("imputeoor") ) diff --git a/R/pipeline_stacking.R b/R/pipeline_stacking.R index cb1512126..4ac1f0a0d 100644 --- a/R/pipeline_stacking.R +++ b/R/pipeline_stacking.R @@ -53,7 +53,7 @@ pipeline_stacking = function(base_learners, super_learner, method = "cv", folds if (use_features) base_learners_cv = c(base_learners_cv, po("nop")) gunion(base_learners_cv, in_place = TRUE) %>>!% - po("featureunion") %>>!% + po("featureunion", id = "featureunion_stacking") %>>!% super_learner } diff --git a/tests/testthat/test_ppl.R b/tests/testthat/test_ppl.R index 948cbbf79..625fab0ce 100644 --- a/tests/testthat/test_ppl.R +++ b/tests/testthat/test_ppl.R @@ -57,3 +57,19 @@ test_that("mlr_pipeops multi-access works", { expect_equal(ppls(), mlr_graphs) }) + +test_that("mlr3book authors don't sleepwalk through life", { + + tasks = tsks(c("breast_cancer", "sonar")) + + + glrn_stack = as_learner(ppl("robustify") %>>% ppl("stacking", + lrns(c("classif.rpart", "classif.debug")), + lrn("classif.log_reg") + )) + glrn_stack$id = "Stack" + + learners = c(glrn_stack) + bmr = benchmark(benchmark_grid(tasks, learners, rsmp("cv", folds = 3))) + +}) From 0fefbbc64b962f8afd528de038ba9f455af1865a Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 26 Mar 2024 17:41:08 +0100 Subject: [PATCH 2/3] use different learners --- tests/testthat/test_ppl.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test_ppl.R b/tests/testthat/test_ppl.R index 625fab0ce..9c88f868b 100644 --- a/tests/testthat/test_ppl.R +++ b/tests/testthat/test_ppl.R @@ -62,14 +62,13 @@ test_that("mlr3book authors don't sleepwalk through life", { tasks = tsks(c("breast_cancer", "sonar")) - glrn_stack = as_learner(ppl("robustify") %>>% ppl("stacking", lrns(c("classif.rpart", "classif.debug")), - lrn("classif.log_reg") + lrn("classif.rpart", id = "classif.rpart2") )) glrn_stack$id = "Stack" learners = c(glrn_stack) - bmr = benchmark(benchmark_grid(tasks, learners, rsmp("cv", folds = 3))) + bmr = benchmark(benchmark_grid(tasks, learners, rsmp("cv", folds = 2))) }) From 47e3ee5a0af9e544230208c7dcc7d1ae04ee6caa Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 26 Mar 2024 17:57:54 +0100 Subject: [PATCH 3/3] adapt tests --- tests/testthat/test_mlr_graphs_stacking.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_mlr_graphs_stacking.R b/tests/testthat/test_mlr_graphs_stacking.R index 79b5f04b0..8cd0d6735 100644 --- a/tests/testthat/test_mlr_graphs_stacking.R +++ b/tests/testthat/test_mlr_graphs_stacking.R @@ -10,7 +10,7 @@ test_that("Stacking Pipeline", { # default graph_stack = pipeline_stacking(base_learners, super_learner) expect_graph(graph_stack) - expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion", "super.rpart")) + expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion_stacking", "super.rpart")) graph_learner = as_learner(graph_stack) graph_learner$train(tsk("iris")) expect_class(graph_learner$model$super.rpart$model, "rpart") @@ -19,7 +19,7 @@ test_that("Stacking Pipeline", { # no nop graph_stack = pipeline_stacking(base_learners, super_learner, use_features = FALSE) expect_graph(graph_stack) - expect_names(graph_stack$ids(), identical.to = c("base.rpart", "featureunion", "super.rpart")) + expect_names(graph_stack$ids(), identical.to = c("base.rpart", "featureunion_stacking", "super.rpart")) graph_learner = as_learner(graph_stack) graph_learner$train(tsk("iris")) expect_class(graph_learner$model$super.rpart$model, "rpart") @@ -28,7 +28,7 @@ test_that("Stacking Pipeline", { # folds graph_stack = pipeline_stacking(base_learners, super_learner, folds = 5) expect_graph(graph_stack) - expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion", "super.rpart")) + expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion_stacking", "super.rpart")) graph_learner = as_learner(graph_stack) graph_learner$train(tsk("iris")) expect_equal(graph_learner$graph$pipeops$base.rpart$param_set$values$resampling.folds, 5) @@ -38,7 +38,7 @@ test_that("Stacking Pipeline", { # insample graph_stack = pipeline_stacking(base_learners, super_learner, method = "insample") expect_graph(graph_stack) - expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion", "super.rpart")) + expect_names(graph_stack$ids(), identical.to = c("base.rpart", "nop", "featureunion_stacking", "super.rpart")) graph_learner = as_learner(graph_stack) graph_learner$train(tsk("iris")) expect_equal(graph_learner$graph$pipeops$base.rpart$param_set$values$resampling.method, "insample")