Merge pull request #748 from mlr-org/emergency_version_update

version 0.5.0-2
mlr-org · Jan 13, 2024 · 044762e · 044762e
2 parents a85f352 + 8dadaf6
commit 044762e
Show file tree

Hide file tree

Showing 32 changed files with 62 additions and 42 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,11 @@
 # mlr3pipelines 0.5.0-9000
 
+
+# mlr3pipelines 0.5.0-2
+
+* Avoid unnecessarily large serializations of `ppl("robustify")` pipelines.
+* Made tests and examples compatible with mlr3 update.
+
 # mlr3pipelines 0.5.0-1
 
 * Bugfix: `PipeOpTuneThreshold` was not overloading the correct `.train` and `.predict` functions.

diff --git a/R/PipeOpColRoles.R b/R/PipeOpColRoles.R
@@ -44,7 +44,7 @@
 #'
 #' task = tsk("boston_housing")
 #' pop = po("colroles", param_vals = list(
-#'   new_role = list(cmedv = "order")
+#'   new_role = list(town = c("order", "feature"))
 #' ))
 #'
 #' pop$train(list(task))

diff --git a/R/PipeOpFilter.R b/R/PipeOpFilter.R
@@ -82,6 +82,7 @@
 #' @examples
 #' library("mlr3")
 #' library("mlr3filters")
+#' \dontshow{data.table::setDTthreads(1)}
 #'
 #' # setup PipeOpFilter to keep the 5 most important
 #' # features of the spam task w.r.t. their AUC

diff --git a/R/PipeOpMissingIndicators.R b/R/PipeOpMissingIndicators.R
@@ -55,6 +55,7 @@
 #' @export
 #' @examples
 #' library("mlr3")
+#' \dontshow{data.table::setDTthreads(1)}
 #'
 #' task = tsk("pima")$select(c("insulin", "triceps"))
 #' sum(complete.cases(task$data()))

diff --git a/R/PipeOpTuneThreshold.R b/R/PipeOpTuneThreshold.R
@@ -116,6 +116,7 @@ PipeOpTuneThreshold = R6Class("PipeOpTuneThreshold",
     .optimize_objfun = function(pred) {
       optimizer = self$param_set$values$optimizer
       if (inherits(optimizer, "character")) optimizer = bbotk::opt(optimizer)
+      if (inherits(optimizer, "OptimizerGenSA")) optimizer$param_set$values$trace.mat = TRUE  # https://github.com/mlr-org/bbotk/issues/214
       ps = private$.make_param_set(pred)
       measure = self$param_set$values$measure
       if (is.character(measure)) measure = msr(measure) else measure

diff --git a/README.Rmd b/README.Rmd
@@ -68,9 +68,10 @@ Single computational steps can be represented as so-called **PipeOps**, which ca
 
 ## Documentation
 
-The easiest way to get started is reading some of the vignettes that are shipped with the package, which can also be viewed online:
+A good way to get into `mlr3pipelines` are the following two vignettes:
 
-* [Quick Introduction](https://mlr3book.mlr-org.com/pipelines.html), with short examples to get started
+* [Sequential Pipelines](https://mlr3book.mlr-org.com/chapters/chapter7/sequential_pipelines.html)
+* [Non-Sequential Pipelines and Tuning](https://mlr3book.mlr-org.com/chapters/chapter8/non-sequential_pipelines_and_tuning.html)
 
 ## Bugs, Questions, Feedback
 

diff --git a/README.md b/README.md
@@ -81,11 +81,10 @@ are:
 
 ## Documentation
 
-The easiest way to get started is reading some of the vignettes that are
-shipped with the package, which can also be viewed online:
+A good way to get into `mlr3pipelines` are the following two vignettes:
 
-  - [Quick Introduction](https://mlr3book.mlr-org.com/pipelines.html),
-    with short examples to get started
+  - [Sequential Pipelines](https://mlr3book.mlr-org.com/chapters/chapter7/sequential_pipelines.html)
+  - [Non-Sequential Pipelines and Tuning](https://mlr3book.mlr-org.com/chapters/chapter8/non-sequential_pipelines_and_tuning.html)
 
 ## Bugs, Questions, Feedback
 

diff --git a/man/mlr_pipeops_colroles.Rd b/man/mlr_pipeops_colroles.Rd
diff --git a/man/mlr_pipeops_filter.Rd b/man/mlr_pipeops_filter.Rd
diff --git a/man/mlr_pipeops_missind.Rd b/man/mlr_pipeops_missind.Rd
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
@@ -14,3 +14,12 @@ x$task_types = data.table::setkeyv(rbind(x$task_types, x$task_types["regr", mult
 
 x$task_types = data.table::setkeyv(rbind(x$task_types, x$task_types["classif", mult = "first"][, `:=`(package = "DUMMY", task = "DUMMY")]), "type")
 
+mlr3::mlr_tasks$add("boston_housing_classic", function(id = "boston_housing_classic") {
+  b = mlr3::as_data_backend(mlr3misc::load_dataset("BostonHousing2", "mlbench"))
+  task = mlr3::TaskRegr$new(id, b, target = "medv", label = "Boston Housing Prices (target leakage, for mlr3pipelines tests only)")
+  b$hash = "mlr3pipelines::mlr_tasks_boston_housing_classic"
+  task
+})
+
+
+data.table::setDTthreads(threads = 1)
diff --git a/tests/testthat/test_GraphLearner.R b/tests/testthat/test_GraphLearner.R
@@ -450,14 +450,14 @@ test_that("predict() function for Graph", {
 
   lx = as_graph(lrn("regr.rpart"))
 
-  lx$train(tsk("boston_housing"))
+  lx$train(tsk("boston_housing_classic"))
 
-  p1 = lx$pipeops$regr.rpart$learner_model$predict(tsk("boston_housing"))
+  p1 = lx$pipeops$regr.rpart$learner_model$predict(tsk("boston_housing_classic"))
 
-  expect_equal(predict(lx, tsk("boston_housing")), p1)
+  expect_equal(predict(lx, tsk("boston_housing_classic")), p1)
 
   expect_equal(
-    predict(lx, tsk("boston_housing")$data(cols = tsk("boston_housing")$feature_names)),
+    predict(lx, tsk("boston_housing_classic")$data(cols = tsk("boston_housing_classic")$feature_names)),
     p1$response
   )
 

diff --git a/tests/testthat/test_learner_weightedaverage.R b/tests/testthat/test_learner_weightedaverage.R
@@ -84,7 +84,7 @@ test_that("LearnerRegrAvg", {
   expect_true(all(is.na(prd$se)))
 
   intask = (pipeline_greplicate(PipeOpLearnerCV$new(lrn("regr.featureless", predict_type = "response")), 3) %>>%
-    PipeOpFeatureUnion$new())$train(tsk("boston_housing"))[[1]]
+    PipeOpFeatureUnion$new())$train(tsk("boston_housing_classic"))[[1]]
 
   # Works for accuracy
   lrn = LearnerRegrAvg$new()
@@ -147,7 +147,7 @@ test_that("LearnerClassifAvg Pipeline", {
 
 test_that("LearnerRegrAvg Pipeline", {
   skip_on_cran()  # takes too long
-  tsk = mlr_tasks$get("boston_housing")
+  tsk = mlr_tasks$get("boston_housing_classic")
   # Works for response
   # TODO: this is a bit of a deep problem: https://github.com/mlr-org/mlr3pipelines/issues/216
   ## lrn = LearnerRegrAvg$new()

diff --git a/tests/testthat/test_mlr_graphs_bagging.R b/tests/testthat/test_mlr_graphs_bagging.R
@@ -15,7 +15,7 @@ test_that("Bagging Pipeline", {
   expect_graph(p)
 
   # regr
-  tsk = tsk("boston_housing")
+  tsk = tsk("boston_housing_classic")
   lrn = lrn("regr.rpart")
   p = ppl("bagging", graph = po(lrn), iterations = 5L, averager = po("regravg", collect_multiplicity = TRUE))
   expect_graph(p)

diff --git a/tests/testthat/test_mlr_graphs_branching.R b/tests/testthat/test_mlr_graphs_branching.R
@@ -2,7 +2,7 @@ context("ppl - pipeline_branch")
 
 test_that("Branching Pipeline", {
   lrns = map(list(lrn("classif.rpart"), lrn("classif.featureless")), po)
-  task = mlr_tasks$get("boston_housing")
+  task = mlr_tasks$get("boston_housing_classic")
   gr = pipeline_branch(lrns)
 
   expect_graph(gr)

diff --git a/tests/testthat/test_mlr_graphs_robustify.R b/tests/testthat/test_mlr_graphs_robustify.R
@@ -44,7 +44,7 @@ test_that("Robustify Pipeline", {
   expect_true(all(c("imputehist", "missind") %in% names(p$pipeops)))
 
   # test on mixed, no missings
-  tsk = tsk("boston_housing")
+  tsk = tsk("boston_housing_classic")
   lrn = lrn("regr.rpart")
   p = ppl("robustify", task = tsk, learner = lrn) %>>% po(lrn)
   expect_graph(p)

diff --git a/tests/testthat/test_mlr_graphs_targettrafo.R b/tests/testthat/test_mlr_graphs_targettrafo.R
@@ -1,7 +1,7 @@
 context("ppl - pipeline_targettrafo")
 
 test_that("Target Trafo Pipeline", {
-  task = tsk("boston_housing")
+  task = tsk("boston_housing_classic")
 
   tt = ppl("targettrafo", graph = PipeOpLearner$new(LearnerRegrRpart$new()))
   tt$param_set$values$targetmutate.trafo = function(x) log(x, base = 2)

diff --git a/tests/testthat/test_pipeop_encode.R b/tests/testthat/test_pipeop_encode.R
@@ -1,12 +1,12 @@
 context("PipeOpEncode")
 
 test_that("PipeOpEncode", {
-  task = mlr_tasks$get("boston_housing")
+  task = mlr_tasks$get("boston_housing_classic")
 
   td = task$data()
   td$town[1] = NA
   td$chas[2] = NA
-  natask = TaskRegr$new("boston_housing_na", td, task$target_names)
+  natask = TaskRegr$new("boston_housing_classic_na", td, task$target_names)
 
   check_dat = function(dat) {
     expect_true(all(is.na(dat[1, grep("^town\\.", colnames(dat), value = TRUE), with = FALSE])))

diff --git a/tests/testthat/test_pipeop_encodeimpact.R b/tests/testthat/test_pipeop_encodeimpact.R
@@ -1,7 +1,7 @@
 context("PipeOpEncodeImpact")
 
 test_that("PipeOpEncodeImpact", {
-  task = mlr_tasks$get("boston_housing")
+  task = mlr_tasks$get("boston_housing_classic")
 
   chaslevels = task$levels()$chas
   townlevels = task$levels()$town
@@ -131,7 +131,7 @@ test_that("PipeOpImpactEncode on Regression", {
 
   selector = as_graph(po("select", selector = selector_type("numeric")))
 
-  expect_equal(unname((selector %>>% op)$train(tsk("boston_housing"))), unname(selector$train(tsk("boston_housing"))))
+  expect_equal(unname((selector %>>% op)$train(tsk("boston_housing_classic"))), unname(selector$train(tsk("boston_housing_classic"))))
 
 
   op$param_set$values$smoothing = 1e-10

diff --git a/tests/testthat/test_pipeop_encodelmer.R b/tests/testthat/test_pipeop_encodelmer.R
@@ -6,7 +6,7 @@ sample_n_letters = function(n, l = 3) {
 
 test_that("PipeOpEncodeLmer regr", {
   set.seed(8008135)
-  task = mlr_tasks$get("boston_housing")
+  task = mlr_tasks$get("boston_housing_classic")
   chaslevels = task$levels()$chas
   townlevels = task$levels()$town
 

diff --git a/tests/testthat/test_pipeop_filter.R b/tests/testthat/test_pipeop_filter.R
@@ -1,7 +1,7 @@
 context("PipeOpFilter")
 
 test_that("PipeOpFilter", {
-  task = mlr_tasks$get("boston_housing")
+  task = mlr_tasks$get("boston_housing_classic")
 
   expect_datapreproc_pipeop_class(PipeOpFilter,
     list(filter = mlr3filters::FilterVariance$new(), param_vals = list(filter.frac = 0.5)), task = task,

diff --git a/tests/testthat/test_pipeop_fixfactors.R b/tests/testthat/test_pipeop_fixfactors.R
@@ -1,7 +1,7 @@
 context("PipeOpFixFactors")
 
 test_that("PipeOpFixFactors", {
-  task = mlr_tasks$get("boston_housing")
+  task = mlr_tasks$get("boston_housing_classic")
 
   chaslevels = task$levels()$chas
   townlevels = task$levels()$town

diff --git a/tests/testthat/test_pipeop_removeconstants.R b/tests/testthat/test_pipeop_removeconstants.R
@@ -1,7 +1,7 @@
 context("PipeOpRemoveConstants")
 
 test_that("PipeOpRemoveConstants - basic properties", {
-  task = mlr_tasks$get("boston_housing")
+  task = mlr_tasks$get("boston_housing_classic")
   task$cbind(data.table(xx = rep(1, 506), yy = rep("a", 506)))
 
   op = PipeOpRemoveConstants$new()
@@ -13,7 +13,7 @@ test_that("PipeOpRemoveConstants - basic properties", {
 
 test_that("PipeOpRemoveConstants removes expected cols", {
 
-  task = mlr_tasks$get("boston_housing")
+  task = mlr_tasks$get("boston_housing_classic")
   task$cbind(data.table(xx = rep(1, 506), yy = rep("a", 506),
     xx1 = c(2, rep(1, 505)), yy1 = c("b", rep("a", 505))))
 

diff --git a/tests/testthat/test_pipeop_scale.R b/tests/testthat/test_pipeop_scale.R
@@ -17,7 +17,7 @@ test_that("PipeOpScale - basic properties", {
 test_that("basic properties", {
   expect_datapreproc_pipeop_class(PipeOpScale, task = mlr_tasks$get("iris"))
 
-  expect_datapreproc_pipeop_class(PipeOpScale, task = mlr_tasks$get("boston_housing"))
+  expect_datapreproc_pipeop_class(PipeOpScale, task = mlr_tasks$get("boston_housing_classic"))
 
   expect_datapreproc_pipeop_class(PipeOpScale, task = mlr_tasks$get("pima"))
 

diff --git a/tests/testthat/test_pipeop_select.R b/tests/testthat/test_pipeop_select.R
@@ -26,6 +26,6 @@ test_that("select", {
   expect_set_equal(po$train(list(tsk("iris")))[[1]]$feature_names, c("Sepal.Length", "Sepal.Width", "Petal.Width"))
 
   po$param_set$values$selector = selector_type("factor")
-  expect_set_equal(po$train(list(tsk("boston_housing")))[[1]]$feature_names, c("chas", "town"))
+  expect_set_equal(po$train(list(tsk("boston_housing_classic")))[[1]]$feature_names, c("chas", "town"))
 
 })
diff --git a/tests/testthat/test_pipeop_subsample.R b/tests/testthat/test_pipeop_subsample.R
@@ -33,7 +33,7 @@ test_that("PipeOpSubsample works unstratified", {
   po = PipeOpSubsample$new()
   tnew = train_pipeop(po, list(task))
 
-  task = mlr_tasks$get("boston_housing")$filter(1L)  # actually has to be an int m(
+  task = mlr_tasks$get("boston_housing_classic")$filter(1L)  # actually has to be an int m(
   po = PipeOpSubsample$new()
   po$param_set$values = list(stratify = TRUE, frac = 0.6)
   expect_error(train_pipeop(po, list(task)))

diff --git a/tests/testthat/test_pipeop_targetmutate.R b/tests/testthat/test_pipeop_targetmutate.R
@@ -17,7 +17,7 @@ test_that("PipeOpTargetMutate - basic properties", {
 
   expect_graph(g)
 
-  task = mlr_tasks$get("boston_housing")
+  task = mlr_tasks$get("boston_housing_classic")
   task_copy = task$clone(deep = TRUE)
   address_in = address(task)
   train_out = g$train(task)
@@ -51,13 +51,13 @@ test_that("PipeOpTargetMutate - log base 2 trafo", {
  g$add_edge(src_id = "logtrafo", dst_id = "regr.rpart", src_channel = 2L, dst_channel = 1L)
  g$add_edge(src_id = "regr.rpart", dst_id = "targetinvert", src_channel = 1L, dst_channel = 2L)
 
- task = mlr_tasks$get("boston_housing")
+ task = mlr_tasks$get("boston_housing_classic")
  train_out = g$train(task)
  predict_out = g$predict(task)
 
  dat = task$data()
  dat$medv = log(dat$medv, base = 2)
- task_log = TaskRegr$new("boston_housing_log", backend = dat, target = "medv")
+ task_log = TaskRegr$new("boston_housing_classic_log", backend = dat, target = "medv")
 
  learner = LearnerRegrRpart$new()
  learner$train(task_log)
@@ -81,7 +81,7 @@ test_that("PipeOpTargetMutate - log base 2 trafo", {
 #' g$add_edge(src_id = "regr_classif", dst_id = "classif.rpart", src_channel = 2L, dst_channel = 1L)
 #' g$add_edge(src_id = "classif.rpart", dst_id = "targetinvert", src_channel = 1L, dst_channel = 2L)
 #'
-#' task = mlr_tasks$get("boston_housing")
+#' task = mlr_tasks$get("boston_housing_classic")
 #' task$col_roles$feature = setdiff(task$col_roles$feature, y = "cmedv")
 #' train_out = g$train(task)
 #' expect_r6(g$state$classif.rpart$train_task, classes = "TaskClassif")

diff --git a/tests/testthat/test_pipeop_targettrafoscalerange.R b/tests/testthat/test_pipeop_targettrafoscalerange.R
@@ -7,7 +7,7 @@ test_that("PipeOpTargetTrafoScaleRange - basic properties", {
 
   expect_pipeop(po)
 
-  task = mlr_tasks$get("boston_housing")
+  task = mlr_tasks$get("boston_housing_classic")
   train_out1 = po$train(list(task))
 
   x = task$data(cols = "medv")[[1L]]
@@ -50,7 +50,7 @@ test_that("PipeOpTargetTrafoScaleRange - basic properties", {
 test_that("PipeOpTargetTrafoScaleRange - row use subsets", {
   po = PipeOpTargetTrafoScaleRange$new()
 
-  task = mlr_tasks$get("boston_housing")
+  task = mlr_tasks$get("boston_housing_classic")
 
   dat_subset = task$data(1:50)
   x = dat_subset$medv

diff --git a/tests/testthat/test_pipeop_task_preproc.R b/tests/testthat/test_pipeop_task_preproc.R
@@ -27,7 +27,7 @@ test_that("Wrong affect_columns errors", {
       predict_dt = function(dt, levels) dt
     )
   )
-  tsk = tsk("boston_housing")
+  tsk = tsk("boston_housing_classic")
   po = POPP$new("foo", param_vals = list(affect_columns = is.factor))
   expect_pipeop(po)
   expect_error(po$train(list(tsk)), "affected_cols")

diff --git a/tests/testthat/test_pipeop_updatetarget.R b/tests/testthat/test_pipeop_updatetarget.R
@@ -20,13 +20,13 @@ test_that("update target regr to classif", {
   trafo_fun = function(x) {factor(ifelse(x < 25, "<25", ">=25"))}
   pom = PipeOpUpdateTarget$new(param_vals = list(trafo = trafo_fun, new_target_name = "threshold_25", new_task_type = "classif"))
   expect_pipeop(pom)
-  newtsk = pom$train(list(tsk("boston_housing")))[[1]]
+  newtsk = pom$train(list(tsk("boston_housing_classic")))[[1]]
   expect_task(newtsk)
   expect_true("threshold_25" %in% newtsk$target_names)
-  expect_true(all((newtsk$data()$threshold_25 == "<25") == (tsk("boston_housing")$data()$medv < 25)))
+  expect_true(all((newtsk$data()$threshold_25 == "<25") == (tsk("boston_housing_classic")$data()$medv < 25)))
   expect_true(pom$is_trained)
 
-  newtsk2 = pom$predict(list(tsk("boston_housing")))[[1]]
+  newtsk2 = pom$predict(list(tsk("boston_housing_classic")))[[1]]
   expect_task(newtsk2)
   expect_true("threshold_25" %in% newtsk2$target_names)
   expect_true(all(levels(newtsk2$data()$threshold_25) == c("<25", ">=25")))

diff --git a/tests/testthat/test_pipeop_vtreat.R b/tests/testthat/test_pipeop_vtreat.R
@@ -5,7 +5,7 @@ test_that("PipeOpVtreat - basic properties", {
 
   expect_pipeop(PipeOpVtreat$new())
 
-  task_regr = mlr_tasks$get("boston_housing")
+  task_regr = mlr_tasks$get("boston_housing_classic")
   expect_datapreproc_pipeop_class(PipeOpVtreat, task = task_regr, deterministic_train = FALSE, deterministic_predict = FALSE)
 
   task_classiftc = mlr_tasks$get("pima")

diff --git a/tests/testthat/test_selector.R b/tests/testthat/test_selector.R
@@ -2,7 +2,7 @@ context("Selector")
 
 test_that("Selectors work", {
   iris_task = mlr3::mlr_tasks$get("iris")
-  bh_task = mlr3::mlr_tasks$get("boston_housing")
+  bh_task = mlr3::mlr_tasks$get("boston_housing_classic")
   pima_task = mlr3::mlr_tasks$get("pima")
 
   sela = selector_all()