Skip to content

Commit

Permalink
Merge branch 'master' into fix/pkgdown
Browse files Browse the repository at this point in the history
  • Loading branch information
sebffischer committed Jan 23, 2024
2 parents 43b02ab + 044762e commit e31b693
Show file tree
Hide file tree
Showing 32 changed files with 62 additions and 42 deletions.
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# mlr3pipelines 0.5.0-9000


# mlr3pipelines 0.5.0-2

* Avoid unnecessarily large serializations of `ppl("robustify")` pipelines.
* Made tests and examples compatible with mlr3 update.

# mlr3pipelines 0.5.0-1

* Bugfix: `PipeOpTuneThreshold` was not overloading the correct `.train` and `.predict` functions.
Expand Down
2 changes: 1 addition & 1 deletion R/PipeOpColRoles.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
#'
#' task = tsk("boston_housing")
#' pop = po("colroles", param_vals = list(
#' new_role = list(cmedv = "order")
#' new_role = list(town = c("order", "feature"))
#' ))
#'
#' pop$train(list(task))
Expand Down
1 change: 1 addition & 0 deletions R/PipeOpFilter.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
#' @examples
#' library("mlr3")
#' library("mlr3filters")
#' \dontshow{data.table::setDTthreads(1)}
#'
#' # setup PipeOpFilter to keep the 5 most important
#' # features of the spam task w.r.t. their AUC
Expand Down
1 change: 1 addition & 0 deletions R/PipeOpMissingIndicators.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#' @export
#' @examples
#' library("mlr3")
#' \dontshow{data.table::setDTthreads(1)}
#'
#' task = tsk("pima")$select(c("insulin", "triceps"))
#' sum(complete.cases(task$data()))
Expand Down
1 change: 1 addition & 0 deletions R/PipeOpTuneThreshold.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ PipeOpTuneThreshold = R6Class("PipeOpTuneThreshold",
.optimize_objfun = function(pred) {
optimizer = self$param_set$values$optimizer
if (inherits(optimizer, "character")) optimizer = bbotk::opt(optimizer)
if (inherits(optimizer, "OptimizerGenSA")) optimizer$param_set$values$trace.mat = TRUE # https://github.com/mlr-org/bbotk/issues/214
ps = private$.make_param_set(pred)
measure = self$param_set$values$measure
if (is.character(measure)) measure = msr(measure) else measure
Expand Down
5 changes: 3 additions & 2 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,10 @@ Single computational steps can be represented as so-called **PipeOps**, which ca

## Documentation

The easiest way to get started is reading some of the vignettes that are shipped with the package, which can also be viewed online:
A good way to get into `mlr3pipelines` are the following two vignettes:

* [Quick Introduction](https://mlr3book.mlr-org.com/pipelines.html), with short examples to get started
* [Sequential Pipelines](https://mlr3book.mlr-org.com/chapters/chapter7/sequential_pipelines.html)
* [Non-Sequential Pipelines and Tuning](https://mlr3book.mlr-org.com/chapters/chapter8/non-sequential_pipelines_and_tuning.html)

## Bugs, Questions, Feedback

Expand Down
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,10 @@ are:

## Documentation

The easiest way to get started is reading some of the vignettes that are
shipped with the package, which can also be viewed online:
A good way to get into `mlr3pipelines` are the following two vignettes:

- [Quick Introduction](https://mlr3book.mlr-org.com/pipelines.html),
with short examples to get started
- [Sequential Pipelines](https://mlr3book.mlr-org.com/chapters/chapter7/sequential_pipelines.html)
- [Non-Sequential Pipelines and Tuning](https://mlr3book.mlr-org.com/chapters/chapter8/non-sequential_pipelines_and_tuning.html)

## Bugs, Questions, Feedback

Expand Down
2 changes: 1 addition & 1 deletion man/mlr_pipeops_colroles.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/mlr_pipeops_filter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/mlr_pipeops_missind.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions tests/testthat/setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,12 @@ x$task_types = data.table::setkeyv(rbind(x$task_types, x$task_types["regr", mult

x$task_types = data.table::setkeyv(rbind(x$task_types, x$task_types["classif", mult = "first"][, `:=`(package = "DUMMY", task = "DUMMY")]), "type")

mlr3::mlr_tasks$add("boston_housing_classic", function(id = "boston_housing_classic") {
b = mlr3::as_data_backend(mlr3misc::load_dataset("BostonHousing2", "mlbench"))
task = mlr3::TaskRegr$new(id, b, target = "medv", label = "Boston Housing Prices (target leakage, for mlr3pipelines tests only)")
b$hash = "mlr3pipelines::mlr_tasks_boston_housing_classic"
task
})


data.table::setDTthreads(threads = 1)
8 changes: 4 additions & 4 deletions tests/testthat/test_GraphLearner.R
Original file line number Diff line number Diff line change
Expand Up @@ -450,14 +450,14 @@ test_that("predict() function for Graph", {

lx = as_graph(lrn("regr.rpart"))

lx$train(tsk("boston_housing"))
lx$train(tsk("boston_housing_classic"))

p1 = lx$pipeops$regr.rpart$learner_model$predict(tsk("boston_housing"))
p1 = lx$pipeops$regr.rpart$learner_model$predict(tsk("boston_housing_classic"))

expect_equal(predict(lx, tsk("boston_housing")), p1)
expect_equal(predict(lx, tsk("boston_housing_classic")), p1)

expect_equal(
predict(lx, tsk("boston_housing")$data(cols = tsk("boston_housing")$feature_names)),
predict(lx, tsk("boston_housing_classic")$data(cols = tsk("boston_housing_classic")$feature_names)),
p1$response
)

Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test_learner_weightedaverage.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ test_that("LearnerRegrAvg", {
expect_true(all(is.na(prd$se)))

intask = (pipeline_greplicate(PipeOpLearnerCV$new(lrn("regr.featureless", predict_type = "response")), 3) %>>%
PipeOpFeatureUnion$new())$train(tsk("boston_housing"))[[1]]
PipeOpFeatureUnion$new())$train(tsk("boston_housing_classic"))[[1]]

# Works for accuracy
lrn = LearnerRegrAvg$new()
Expand Down Expand Up @@ -147,7 +147,7 @@ test_that("LearnerClassifAvg Pipeline", {

test_that("LearnerRegrAvg Pipeline", {
skip_on_cran() # takes too long
tsk = mlr_tasks$get("boston_housing")
tsk = mlr_tasks$get("boston_housing_classic")
# Works for response
# TODO: this is a bit of a deep problem: https://github.com/mlr-org/mlr3pipelines/issues/216
## lrn = LearnerRegrAvg$new()
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_mlr_graphs_bagging.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ test_that("Bagging Pipeline", {
expect_graph(p)

# regr
tsk = tsk("boston_housing")
tsk = tsk("boston_housing_classic")
lrn = lrn("regr.rpart")
p = ppl("bagging", graph = po(lrn), iterations = 5L, averager = po("regravg", collect_multiplicity = TRUE))
expect_graph(p)
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_mlr_graphs_branching.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ context("ppl - pipeline_branch")

test_that("Branching Pipeline", {
lrns = map(list(lrn("classif.rpart"), lrn("classif.featureless")), po)
task = mlr_tasks$get("boston_housing")
task = mlr_tasks$get("boston_housing_classic")
gr = pipeline_branch(lrns)

expect_graph(gr)
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_mlr_graphs_robustify.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ test_that("Robustify Pipeline", {
expect_true(all(c("imputehist", "missind") %in% names(p$pipeops)))

# test on mixed, no missings
tsk = tsk("boston_housing")
tsk = tsk("boston_housing_classic")
lrn = lrn("regr.rpart")
p = ppl("robustify", task = tsk, learner = lrn) %>>% po(lrn)
expect_graph(p)
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_mlr_graphs_targettrafo.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
context("ppl - pipeline_targettrafo")

test_that("Target Trafo Pipeline", {
task = tsk("boston_housing")
task = tsk("boston_housing_classic")

tt = ppl("targettrafo", graph = PipeOpLearner$new(LearnerRegrRpart$new()))
tt$param_set$values$targetmutate.trafo = function(x) log(x, base = 2)
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test_pipeop_encode.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
context("PipeOpEncode")

test_that("PipeOpEncode", {
task = mlr_tasks$get("boston_housing")
task = mlr_tasks$get("boston_housing_classic")

td = task$data()
td$town[1] = NA
td$chas[2] = NA
natask = TaskRegr$new("boston_housing_na", td, task$target_names)
natask = TaskRegr$new("boston_housing_classic_na", td, task$target_names)

check_dat = function(dat) {
expect_true(all(is.na(dat[1, grep("^town\\.", colnames(dat), value = TRUE), with = FALSE])))
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test_pipeop_encodeimpact.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
context("PipeOpEncodeImpact")

test_that("PipeOpEncodeImpact", {
task = mlr_tasks$get("boston_housing")
task = mlr_tasks$get("boston_housing_classic")

chaslevels = task$levels()$chas
townlevels = task$levels()$town
Expand Down Expand Up @@ -131,7 +131,7 @@ test_that("PipeOpImpactEncode on Regression", {

selector = as_graph(po("select", selector = selector_type("numeric")))

expect_equal(unname((selector %>>% op)$train(tsk("boston_housing"))), unname(selector$train(tsk("boston_housing"))))
expect_equal(unname((selector %>>% op)$train(tsk("boston_housing_classic"))), unname(selector$train(tsk("boston_housing_classic"))))


op$param_set$values$smoothing = 1e-10
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_pipeop_encodelmer.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ sample_n_letters = function(n, l = 3) {

test_that("PipeOpEncodeLmer regr", {
set.seed(8008135)
task = mlr_tasks$get("boston_housing")
task = mlr_tasks$get("boston_housing_classic")
chaslevels = task$levels()$chas
townlevels = task$levels()$town

Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_pipeop_filter.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
context("PipeOpFilter")

test_that("PipeOpFilter", {
task = mlr_tasks$get("boston_housing")
task = mlr_tasks$get("boston_housing_classic")

expect_datapreproc_pipeop_class(PipeOpFilter,
list(filter = mlr3filters::FilterVariance$new(), param_vals = list(filter.frac = 0.5)), task = task,
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_pipeop_fixfactors.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
context("PipeOpFixFactors")

test_that("PipeOpFixFactors", {
task = mlr_tasks$get("boston_housing")
task = mlr_tasks$get("boston_housing_classic")

chaslevels = task$levels()$chas
townlevels = task$levels()$town
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test_pipeop_removeconstants.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
context("PipeOpRemoveConstants")

test_that("PipeOpRemoveConstants - basic properties", {
task = mlr_tasks$get("boston_housing")
task = mlr_tasks$get("boston_housing_classic")
task$cbind(data.table(xx = rep(1, 506), yy = rep("a", 506)))

op = PipeOpRemoveConstants$new()
Expand All @@ -13,7 +13,7 @@ test_that("PipeOpRemoveConstants - basic properties", {

test_that("PipeOpRemoveConstants removes expected cols", {

task = mlr_tasks$get("boston_housing")
task = mlr_tasks$get("boston_housing_classic")
task$cbind(data.table(xx = rep(1, 506), yy = rep("a", 506),
xx1 = c(2, rep(1, 505)), yy1 = c("b", rep("a", 505))))

Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_pipeop_scale.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ test_that("PipeOpScale - basic properties", {
test_that("basic properties", {
expect_datapreproc_pipeop_class(PipeOpScale, task = mlr_tasks$get("iris"))

expect_datapreproc_pipeop_class(PipeOpScale, task = mlr_tasks$get("boston_housing"))
expect_datapreproc_pipeop_class(PipeOpScale, task = mlr_tasks$get("boston_housing_classic"))

expect_datapreproc_pipeop_class(PipeOpScale, task = mlr_tasks$get("pima"))

Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_pipeop_select.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,6 @@ test_that("select", {
expect_set_equal(po$train(list(tsk("iris")))[[1]]$feature_names, c("Sepal.Length", "Sepal.Width", "Petal.Width"))

po$param_set$values$selector = selector_type("factor")
expect_set_equal(po$train(list(tsk("boston_housing")))[[1]]$feature_names, c("chas", "town"))
expect_set_equal(po$train(list(tsk("boston_housing_classic")))[[1]]$feature_names, c("chas", "town"))

})
2 changes: 1 addition & 1 deletion tests/testthat/test_pipeop_subsample.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ test_that("PipeOpSubsample works unstratified", {
po = PipeOpSubsample$new()
tnew = train_pipeop(po, list(task))

task = mlr_tasks$get("boston_housing")$filter(1L) # actually has to be an int m(
task = mlr_tasks$get("boston_housing_classic")$filter(1L) # actually has to be an int m(
po = PipeOpSubsample$new()
po$param_set$values = list(stratify = TRUE, frac = 0.6)
expect_error(train_pipeop(po, list(task)))
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test_pipeop_targetmutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ test_that("PipeOpTargetMutate - basic properties", {

expect_graph(g)

task = mlr_tasks$get("boston_housing")
task = mlr_tasks$get("boston_housing_classic")
task_copy = task$clone(deep = TRUE)
address_in = address(task)
train_out = g$train(task)
Expand Down Expand Up @@ -51,13 +51,13 @@ test_that("PipeOpTargetMutate - log base 2 trafo", {
g$add_edge(src_id = "logtrafo", dst_id = "regr.rpart", src_channel = 2L, dst_channel = 1L)
g$add_edge(src_id = "regr.rpart", dst_id = "targetinvert", src_channel = 1L, dst_channel = 2L)

task = mlr_tasks$get("boston_housing")
task = mlr_tasks$get("boston_housing_classic")
train_out = g$train(task)
predict_out = g$predict(task)

dat = task$data()
dat$medv = log(dat$medv, base = 2)
task_log = TaskRegr$new("boston_housing_log", backend = dat, target = "medv")
task_log = TaskRegr$new("boston_housing_classic_log", backend = dat, target = "medv")

learner = LearnerRegrRpart$new()
learner$train(task_log)
Expand All @@ -81,7 +81,7 @@ test_that("PipeOpTargetMutate - log base 2 trafo", {
#' g$add_edge(src_id = "regr_classif", dst_id = "classif.rpart", src_channel = 2L, dst_channel = 1L)
#' g$add_edge(src_id = "classif.rpart", dst_id = "targetinvert", src_channel = 1L, dst_channel = 2L)
#'
#' task = mlr_tasks$get("boston_housing")
#' task = mlr_tasks$get("boston_housing_classic")
#' task$col_roles$feature = setdiff(task$col_roles$feature, y = "cmedv")
#' train_out = g$train(task)
#' expect_r6(g$state$classif.rpart$train_task, classes = "TaskClassif")
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test_pipeop_targettrafoscalerange.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ test_that("PipeOpTargetTrafoScaleRange - basic properties", {

expect_pipeop(po)

task = mlr_tasks$get("boston_housing")
task = mlr_tasks$get("boston_housing_classic")
train_out1 = po$train(list(task))

x = task$data(cols = "medv")[[1L]]
Expand Down Expand Up @@ -50,7 +50,7 @@ test_that("PipeOpTargetTrafoScaleRange - basic properties", {
test_that("PipeOpTargetTrafoScaleRange - row use subsets", {
po = PipeOpTargetTrafoScaleRange$new()

task = mlr_tasks$get("boston_housing")
task = mlr_tasks$get("boston_housing_classic")

dat_subset = task$data(1:50)
x = dat_subset$medv
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_pipeop_task_preproc.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ test_that("Wrong affect_columns errors", {
predict_dt = function(dt, levels) dt
)
)
tsk = tsk("boston_housing")
tsk = tsk("boston_housing_classic")
po = POPP$new("foo", param_vals = list(affect_columns = is.factor))
expect_pipeop(po)
expect_error(po$train(list(tsk)), "affected_cols")
Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test_pipeop_updatetarget.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ test_that("update target regr to classif", {
trafo_fun = function(x) {factor(ifelse(x < 25, "<25", ">=25"))}
pom = PipeOpUpdateTarget$new(param_vals = list(trafo = trafo_fun, new_target_name = "threshold_25", new_task_type = "classif"))
expect_pipeop(pom)
newtsk = pom$train(list(tsk("boston_housing")))[[1]]
newtsk = pom$train(list(tsk("boston_housing_classic")))[[1]]
expect_task(newtsk)
expect_true("threshold_25" %in% newtsk$target_names)
expect_true(all((newtsk$data()$threshold_25 == "<25") == (tsk("boston_housing")$data()$medv < 25)))
expect_true(all((newtsk$data()$threshold_25 == "<25") == (tsk("boston_housing_classic")$data()$medv < 25)))
expect_true(pom$is_trained)

newtsk2 = pom$predict(list(tsk("boston_housing")))[[1]]
newtsk2 = pom$predict(list(tsk("boston_housing_classic")))[[1]]
expect_task(newtsk2)
expect_true("threshold_25" %in% newtsk2$target_names)
expect_true(all(levels(newtsk2$data()$threshold_25) == c("<25", ">=25")))
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_pipeop_vtreat.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ test_that("PipeOpVtreat - basic properties", {

expect_pipeop(PipeOpVtreat$new())

task_regr = mlr_tasks$get("boston_housing")
task_regr = mlr_tasks$get("boston_housing_classic")
expect_datapreproc_pipeop_class(PipeOpVtreat, task = task_regr, deterministic_train = FALSE, deterministic_predict = FALSE)

task_classiftc = mlr_tasks$get("pima")
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_selector.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ context("Selector")

test_that("Selectors work", {
iris_task = mlr3::mlr_tasks$get("iris")
bh_task = mlr3::mlr_tasks$get("boston_housing")
bh_task = mlr3::mlr_tasks$get("boston_housing_classic")
pima_task = mlr3::mlr_tasks$get("pima")

sela = selector_all()
Expand Down

0 comments on commit e31b693

Please sign in to comment.