diff --git a/DESCRIPTION b/DESCRIPTION index 2bd011e60..8b5b8b982 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mlr3pipelines Title: Preprocessing Operators and Pipelines for 'mlr3' -Version: 0.5.1-9000 +Version: 0.5.2-9000 Authors@R: c(person(given = "Martin", family = "Binder", @@ -86,7 +86,8 @@ Suggests: GenSA, methods, vtreat, - future + future, + htmlwidgets ByteCompile: true Encoding: UTF-8 Config/testthat/edition: 3 @@ -175,6 +176,7 @@ Collate: 'operators.R' 'pipeline_bagging.R' 'pipeline_branch.R' + 'pipeline_convert_types.R' 'pipeline_greplicate.R' 'pipeline_ovr.R' 'pipeline_robustify.R' diff --git a/NAMESPACE b/NAMESPACE index ab287b3d8..0c06c28ab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -127,6 +127,7 @@ export(mlr_graphs) export(mlr_pipeops) export(pipeline_bagging) export(pipeline_branch) +export(pipeline_convert_types) export(pipeline_greplicate) export(pipeline_ovr) export(pipeline_robustify) diff --git a/NEWS.md b/NEWS.md index 1923c3e3a..0e40259de 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,10 @@ -# mlr3pipelines 0.5.1-9000 +# mlr3pipelines 0.5.2-9000 + +# mlr3pipelines 0.5.2 + +* Added new `ppl("convert_types")`. +* Minor documentation fixes. +* Test helpers are now available in `inst/`. These are considered experimental and unstable. * Added marshaling support to `GraphLearner` diff --git a/R/GraphLearner.R b/R/GraphLearner.R index db254036a..4bf5c573a 100644 --- a/R/GraphLearner.R +++ b/R/GraphLearner.R @@ -68,6 +68,7 @@ #' @family Learners #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' #' graph = po("pca") %>>% lrn("classif.rpart") @@ -86,6 +87,7 @@ #' #' # Feature importance (of principal components): #' lr$graph_model$pipeops$classif.rpart$learner_model$importance() +#' \dontshow{ \} } GraphLearner = R6Class("GraphLearner", inherit = Learner, public = list( initialize = function(graph, id = NULL, param_vals = list(), task_type = NULL, predict_type = NULL, clone_graph = TRUE) { diff --git a/R/PipeOpBoxCox.R b/R/PipeOpBoxCox.R index 43924c7f2..45cafc5fc 100644 --- a/R/PipeOpBoxCox.R +++ b/R/PipeOpBoxCox.R @@ -48,6 +48,7 @@ #' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("bestNormalize")) \{ } #' library("mlr3") #' #' task = tsk("iris") @@ -57,6 +58,7 @@ #' pop$train(list(task))[[1]]$data() #' #' pop$state +#' \dontshow{ \} } #' @family PipeOps #' @include PipeOpTaskPreproc.R #' @template seealso_pipeopslist diff --git a/R/PipeOpClassifAvg.R b/R/PipeOpClassifAvg.R index f9efd0e82..694cdda66 100644 --- a/R/PipeOpClassifAvg.R +++ b/R/PipeOpClassifAvg.R @@ -64,6 +64,7 @@ #' @export #' #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' \donttest{ #' library("mlr3") #' @@ -77,6 +78,7 @@ #' #' resample(tsk("iris"), GraphLearner$new(gr), rsmp("holdout")) #' } +#' \dontshow{ \} } PipeOpClassifAvg = R6Class("PipeOpClassifAvg", inherit = PipeOpEnsemble, public = list( diff --git a/R/PipeOpEncode.R b/R/PipeOpEncode.R index 35e630525..e9fb0b51a 100644 --- a/R/PipeOpEncode.R +++ b/R/PipeOpEncode.R @@ -5,7 +5,7 @@ #' @format [`R6Class`] object inheriting from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @description -#' Encodes columns of type `factor`, `character` and `ordered`. +#' Encodes columns of type `factor` and `ordered`. #' #' Possible encodings are `"one-hot"` encoding, as well as encoding according to `stats::contr.helmert()`, `stats::contr.poly()`, #' `stats::contr.sum()` and `stats::contr.treatment()`. @@ -14,6 +14,8 @@ #' #' Use the [`PipeOpTaskPreproc`] `$affect_columns` functionality to only encode a subset of columns, or only encode columns of a certain type. #' +#' `character`-type features can be encoded by converting them `factor` features first, using [`ppl("convert_types", "character", "factor")`][mlr_graphs_convert_types]. +#' #' @section Construction: #' ``` #' PipeOpEncode$new(id = "encode", param_vals = list()) @@ -26,7 +28,7 @@ #' @section Input and Output Channels: #' Input and output channels are inherited from [`PipeOpTaskPreproc`]. #' -#' The output is the input [`Task`][mlr3::Task] with all affected `factor`, `character` or `ordered` parameters encoded according to the `method` +#' The output is the input [`Task`][mlr3::Task] with all affected `factor` and `ordered` parameters encoded according to the `method` #' parameter. #' #' @section State: @@ -78,6 +80,14 @@ #' #' poe$param_set$values$method = "sum" #' poe$train(list(task))[[1]]$data() +#' +#' # converting character-columns +#' data_chr = data.table::data.table(x = factor(letters[1:3]), y = letters[1:3]) +#' task_chr = TaskClassif$new("task_chr", data_chr, "x") +#' +#' goe = ppl("convert_types", "character", "factor") %>>% po("encode") +#' +#' goe$train(task_chr)[[1]]$data() PipeOpEncode = R6Class("PipeOpEncode", inherit = PipeOpTaskPreprocSimple, public = list( diff --git a/R/PipeOpEncodeLmer.R b/R/PipeOpEncodeLmer.R index abdb031b0..cacb15ea3 100644 --- a/R/PipeOpEncodeLmer.R +++ b/R/PipeOpEncodeLmer.R @@ -72,6 +72,8 @@ #' @include PipeOpTaskPreproc.R #' @export #' @examples +#' \dontshow{ if (requireNamespace("nloptr")) \{ } +#' \dontshow{ if (requireNamespace("lme4")) \{ } #' library("mlr3") #' poe = po("encodelmer") #' @@ -84,6 +86,8 @@ #' poe$train(list(task))[[1]]$data() #' #' poe$state +#' \dontshow{ \} } +#' \dontshow{ \} } PipeOpEncodeLmer = R6Class("PipeOpEncodeLmer", inherit = PipeOpTaskPreprocSimple, public = list( diff --git a/R/PipeOpFilter.R b/R/PipeOpFilter.R index d8657dce4..321849709 100644 --- a/R/PipeOpFilter.R +++ b/R/PipeOpFilter.R @@ -80,6 +80,8 @@ #' @include PipeOpTaskPreproc.R #' @export #' @examples +#' \dontshow{ if (requireNamespace("mlr3filters")) \{ } +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' library("mlr3filters") #' \dontshow{data.table::setDTthreads(1)} @@ -107,6 +109,8 @@ #' learner = GraphLearner$new(gr) #' rr = resample(task, learner, rsmp("holdout"), store_models = TRUE) #' rr$learners[[1]]$model$auc$scores +#' \dontshow{ \} } +#' \dontshow{ \} } PipeOpFilter = R6Class("PipeOpFilter", inherit = PipeOpTaskPreprocSimple, public = list( diff --git a/R/PipeOpICA.R b/R/PipeOpICA.R index 9f363ed3e..32b500bc2 100644 --- a/R/PipeOpICA.R +++ b/R/PipeOpICA.R @@ -73,6 +73,7 @@ #' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("fastICA")) \{ } #' library("mlr3") #' #' task = tsk("iris") @@ -82,6 +83,7 @@ #' pop$train(list(task))[[1]]$data() #' #' pop$state +#' \dontshow{ \} } #' @family PipeOps #' @template seealso_pipeopslist #' @include PipeOpTaskPreproc.R diff --git a/R/PipeOpImputeLearner.R b/R/PipeOpImputeLearner.R index 2dd9ef473..e2e4e048c 100644 --- a/R/PipeOpImputeLearner.R +++ b/R/PipeOpImputeLearner.R @@ -70,6 +70,7 @@ #' Only methods inherited from [`PipeOpImpute`]/[`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' #' task = tsk("pima") @@ -93,6 +94,7 @@ #' new_task = po$train(list(task = task))[[1]] #' new_task$missings() #' +#' \dontshow{ \} } #' @family PipeOps #' @family Imputation PipeOps #' @template seealso_pipeopslist diff --git a/R/PipeOpKernelPCA.R b/R/PipeOpKernelPCA.R index 5c24b21d4..0adb4017c 100644 --- a/R/PipeOpKernelPCA.R +++ b/R/PipeOpKernelPCA.R @@ -52,6 +52,7 @@ #' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("kernlab")) \{ } #' library("mlr3") #' #' task = tsk("iris") @@ -59,6 +60,7 @@ #' #' task$data() #' pop$train(list(task))[[1]]$data() +#' \dontshow{ \} } #' @family PipeOps #' @template seealso_pipeopslist #' @include PipeOpTaskPreproc.R diff --git a/R/PipeOpLearner.R b/R/PipeOpLearner.R index b35949e78..5894dff94 100644 --- a/R/PipeOpLearner.R +++ b/R/PipeOpLearner.R @@ -72,6 +72,7 @@ #' @include PipeOp.R #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' #' task = tsk("iris") @@ -80,6 +81,7 @@ #' #' lrn_po$train(list(task)) #' lrn_po$predict(list(task)) +#' \dontshow{ \} } PipeOpLearner = R6Class("PipeOpLearner", inherit = PipeOp, public = list( initialize = function(learner, id = NULL, param_vals = list()) { diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R index db464aeda..ed7166c70 100644 --- a/R/PipeOpLearnerCV.R +++ b/R/PipeOpLearnerCV.R @@ -93,6 +93,7 @@ #' @include PipeOpTaskPreproc.R #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' #' task = tsk("iris") @@ -113,6 +114,7 @@ #' graph$pipeops$classif.rpart$learner$predict_type = "prob" #' #' graph$train(task) +#' \dontshow{ \} } PipeOpLearnerCV = R6Class("PipeOpLearnerCV", inherit = PipeOpTaskPreproc, public = list( diff --git a/R/PipeOpNMF.R b/R/PipeOpNMF.R index bb99d02b1..b6ff08842 100644 --- a/R/PipeOpNMF.R +++ b/R/PipeOpNMF.R @@ -88,6 +88,8 @@ #' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("NMF")) \{ } +#' \dontshow{ if (requireNamespace("MASS")) \{ } #' if (requireNamespace("NMF")) { #' library("mlr3") #' @@ -99,6 +101,8 @@ #' #' pop$state #' } +#' \dontshow{ \} } +#' \dontshow{ \} } #' @family PipeOps #' @template seealso_pipeopslist #' @include PipeOpTaskPreproc.R diff --git a/R/PipeOpOVR.R b/R/PipeOpOVR.R index e8d7c6e78..814041b75 100644 --- a/R/PipeOpOVR.R +++ b/R/PipeOpOVR.R @@ -68,11 +68,13 @@ #' @include PipeOp.R #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library(mlr3) #' task = tsk("iris") #' po = po("ovrsplit") #' po$train(list(task)) #' po$predict(list(task)) +#' \dontshow{ \} } PipeOpOVRSplit = R6Class("PipeOpOVRSplit", inherit = PipeOp, public = list( @@ -174,6 +176,7 @@ mlr_pipeops$add("ovrsplit", PipeOpOVRSplit) #' @include PipeOpEnsemble.R #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library(mlr3) #' task = tsk("iris") #' gr = po("ovrsplit") %>>% lrn("classif.rpart") %>>% po("ovrunite") @@ -181,6 +184,7 @@ mlr_pipeops$add("ovrsplit", PipeOpOVRSplit) #' gr$predict(task) #' gr$pipeops$classif.rpart$learner$predict_type = "prob" #' gr$predict(task) +#' \dontshow{ \} } PipeOpOVRUnite = R6Class("PipeOpOVRUnite", inherit = PipeOpEnsemble, public = list( diff --git a/R/PipeOpProxy.R b/R/PipeOpProxy.R index 5d7b91236..0a61c1177 100644 --- a/R/PipeOpProxy.R +++ b/R/PipeOpProxy.R @@ -55,6 +55,7 @@ #' Only methods inherited from [`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' library("mlr3learners") #' @@ -73,6 +74,7 @@ #' g$param_set$values$learner.content = lrn("classif.rpart") #' rr_pca_rpart = resample(task, learner = GraphLearner$new(g), resampling = rsmp("cv", folds = 3)) #' rr_pca_rpart$aggregate(msr("classif.ce")) +#' \dontshow{ \} } #' @family PipeOps #' @template seealso_pipeopslist #' @include PipeOp.R diff --git a/R/PipeOpRandomResponse.R b/R/PipeOpRandomResponse.R index b883c8858..336f83f6c 100644 --- a/R/PipeOpRandomResponse.R +++ b/R/PipeOpRandomResponse.R @@ -63,6 +63,7 @@ #' @include PipeOp.R #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library(mlr3) #' library(mlr3learners) #' @@ -79,6 +80,7 @@ #' g2$pipeops$regr.lm$learner$predict_type = "se" #' set.seed(2906) #' g2$predict(task2) +#' \dontshow{ \} } PipeOpRandomResponse = R6Class("PipeOpRandomResponse", inherit = PipeOp, public = list( diff --git a/R/PipeOpRegrAvg.R b/R/PipeOpRegrAvg.R index 2d3ca5d16..0aaf1dcea 100644 --- a/R/PipeOpRegrAvg.R +++ b/R/PipeOpRegrAvg.R @@ -57,6 +57,7 @@ #' @include PipeOpEnsemble.R #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' #' # Simple Bagging @@ -68,6 +69,7 @@ #' po("classifavg") #' #' resample(tsk("iris"), GraphLearner$new(gr), rsmp("holdout")) +#' \dontshow{ \} } PipeOpRegrAvg = R6Class("PipeOpRegrAvg", inherit = PipeOpEnsemble, diff --git a/R/PipeOpSmote.R b/R/PipeOpSmote.R index 9e512bbd0..cdf18a363 100644 --- a/R/PipeOpSmote.R +++ b/R/PipeOpSmote.R @@ -54,6 +54,7 @@ #' @include PipeOpTaskPreproc.R #' @export #' @examples +#' \dontshow{ if (requireNamespace("smotefamily")) \{ } #' library("mlr3") #' #' # Create example task @@ -67,6 +68,7 @@ #' pop = po("smote") #' smotedata = pop$train(list(task))[[1]]$data() #' table(smotedata$result) +#' \dontshow{ \} } PipeOpSmote = R6Class("PipeOpSmote", inherit = PipeOpTaskPreproc, public = list( diff --git a/R/PipeOpTextVectorizer.R b/R/PipeOpTextVectorizer.R index 8550fdb22..3a78fd967 100644 --- a/R/PipeOpTextVectorizer.R +++ b/R/PipeOpTextVectorizer.R @@ -140,6 +140,8 @@ #' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("stopwords")) \{ } +#' \dontshow{ if (requireNamespace("quanteda")) \{ } #' library("mlr3") #' library("data.table") #' # create some text data @@ -157,6 +159,8 @@ #' one_line_of_iris$data() #' #' pos$predict(list(one_line_of_iris))[[1]]$data() +#' \dontshow{ \} } +#' \dontshow{ \} } #' @family PipeOps #' @template seealso_pipeopslist #' @include PipeOpTaskPreproc.R diff --git a/R/PipeOpThreshold.R b/R/PipeOpThreshold.R index 3f9dae220..18b8f7e74 100644 --- a/R/PipeOpThreshold.R +++ b/R/PipeOpThreshold.R @@ -42,12 +42,14 @@ #' Only methods inherited from [`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' t = tsk("german_credit") #' gr = po(lrn("classif.rpart", predict_type = "prob")) %>>% #' po("threshold", param_vals = list(thresholds = 0.9)) #' gr$train(t) #' gr$predict(t) +#' \dontshow{ \} } #' @family PipeOps #' @template seealso_pipeopslist #' @include PipeOp.R diff --git a/R/PipeOpTrafo.R b/R/PipeOpTrafo.R index 101256cd0..0d360c1ae 100644 --- a/R/PipeOpTrafo.R +++ b/R/PipeOpTrafo.R @@ -309,6 +309,7 @@ mlr_pipeops$add("targetinvert", PipeOpTargetInvert) #' Only methods inherited from [`PipeOpTargetTrafo`]/[`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library(mlr3) #' task = tsk("boston_housing") #' po = PipeOpTargetMutate$new("logtrafo", param_vals = list( @@ -339,6 +340,7 @@ mlr_pipeops$add("targetinvert", PipeOpTargetInvert) #' tt = ppl("targettrafo", graph = PipeOpLearner$new(LearnerRegrRpart$new())) #' tt$param_set$values$targetmutate.trafo = function(x) log(x, base = 2) #' tt$param_set$values$targetmutate.inverter = function(x) list(response = 2 ^ x$response) +#' \dontshow{ \} } #' @family PipeOps #' @template seealso_pipeopslist #' @include PipeOp.R @@ -436,6 +438,7 @@ mlr_pipeops$add("targetmutate", PipeOpTargetMutate) #' Only methods inherited from [`PipeOpTargetTrafo`]/[`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library(mlr3) #' task = tsk("boston_housing") #' po = PipeOpTargetTrafoScaleRange$new() @@ -449,6 +452,7 @@ mlr_pipeops$add("targetmutate", PipeOpTargetMutate) #' ttscalerange$train(task) #' ttscalerange$predict(task) #' ttscalerange$state$regr.rpart +#' \dontshow{ \} } #' @family PipeOps #' @template seealso_pipeopslist #' @include PipeOp.R @@ -548,6 +552,7 @@ mlr_pipeops$add("targettrafoscalerange", PipeOpTargetTrafoScaleRange) #' Only methods inherited from [`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' \dontrun{ #' # Create a binary class task from iris #' library(mlr3) @@ -556,6 +561,7 @@ mlr_pipeops$add("targettrafoscalerange", PipeOpTargetTrafoScaleRange) #' po$train(list(tsk("iris"))) #' po$predict(list(tsk("iris"))) #' } +#' \dontshow{ \} } #' @family mlr3pipelines backend related #' @family PipeOps #' @template seealso_pipeopslist diff --git a/R/PipeOpTuneThreshold.R b/R/PipeOpTuneThreshold.R index e4891eb6f..f15e4ade8 100644 --- a/R/PipeOpTuneThreshold.R +++ b/R/PipeOpTuneThreshold.R @@ -57,6 +57,8 @@ #' Only methods inherited from [`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("bbotk")) \{ } +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' #' task = tsk("iris") @@ -67,6 +69,8 @@ #' pop$train(task) #' #' pop$state +#' \dontshow{ \} } +#' \dontshow{ \} } #' @family PipeOps #' @template seealso_pipeopslist #' @export diff --git a/R/PipeOpVtreat.R b/R/PipeOpVtreat.R index 677fbc7dd..4520d913e 100644 --- a/R/PipeOpVtreat.R +++ b/R/PipeOpVtreat.R @@ -102,6 +102,7 @@ #' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("vtreat")) \{ } #' library("mlr3") #' #' set.seed(2020) @@ -120,6 +121,7 @@ #' #' pop = PipeOpVtreat$new() #' pop$train(list(task)) +#' \dontshow{ \} } #' @family PipeOps #' @template seealso_pipeopslist #' @include PipeOpTaskPreproc.R diff --git a/R/PipeOpYeoJohnson.R b/R/PipeOpYeoJohnson.R index 99d309f7c..510107141 100644 --- a/R/PipeOpYeoJohnson.R +++ b/R/PipeOpYeoJohnson.R @@ -50,6 +50,7 @@ #' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @examples +#' \dontshow{ if (requireNamespace("bestNormalize")) \{ } #' library("mlr3") #' #' task = tsk("iris") @@ -59,6 +60,7 @@ #' pop$train(list(task))[[1]]$data() #' #' pop$state +#' \dontshow{ \} } #' @family PipeOps #' @template seealso_pipeopslist #' @include PipeOpTaskPreproc.R diff --git a/R/mlr_graphs.R b/R/mlr_graphs.R index f1103da05..c711ea351 100644 --- a/R/mlr_graphs.R +++ b/R/mlr_graphs.R @@ -25,6 +25,7 @@ #' @family Dictionaries #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library(mlr3) #' lrn = lrn("regr.rpart") #' task = mlr_tasks$get("boston_housing") @@ -38,6 +39,7 @@ #' #' # all Graphs currently in the dictionary: #' as.data.table(mlr_graphs) +#' \dontshow{ \} } mlr_graphs = R6Class("DictionaryGraph", inherit = mlr3misc::Dictionary, cloneable = FALSE, public = list( diff --git a/R/mlr_pipeops.R b/R/mlr_pipeops.R index e498ffd19..80dcea406 100644 --- a/R/mlr_pipeops.R +++ b/R/mlr_pipeops.R @@ -34,6 +34,7 @@ #' @family Dictionaries #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' #' mlr_pipeops$get("learner", lrn("classif.rpart")) @@ -43,6 +44,7 @@ #' #' # all PipeOps currently in the dictionary: #' as.data.table(mlr_pipeops)[, c("key", "input.num", "output.num", "packages")] +#' \dontshow{ \} } mlr_pipeops = R6Class("DictionaryPipeOp", inherit = mlr3misc::Dictionary, cloneable = FALSE, public = list( diff --git a/R/pipeline_bagging.R b/R/pipeline_bagging.R index 31b743d32..d1ec2cec3 100644 --- a/R/pipeline_bagging.R +++ b/R/pipeline_bagging.R @@ -34,6 +34,7 @@ #' @return [`Graph`] #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' \donttest{ #' library(mlr3) #' lrn_po = po("learner", lrn("regr.rpart")) @@ -46,6 +47,7 @@ #' averager = po("regravg", collect_multiplicity = TRUE)) #' resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() #' } +#' \dontshow{ \} } pipeline_bagging = function(graph, iterations = 10, frac = 0.7, averager = NULL, replace = FALSE) { g = as_graph(graph) assert_count(iterations) diff --git a/R/pipeline_convert_types.R b/R/pipeline_convert_types.R new file mode 100644 index 000000000..7b1ec2f75 --- /dev/null +++ b/R/pipeline_convert_types.R @@ -0,0 +1,101 @@ +#' @include mlr_graphs.R + +#' @title Convert Column Types +#' @name mlr_graphs_convert_types +#' @description +#' Converts all columns of type `type_from` to `type_to`, using the corresponding R function (e.g. `as.numeric()`, `as.factor()`). +#' It is possible to further subset the columns that should be affected using the `affect_columns` argument. +#' The resulting [`Graph`] contains a [`PipeOpColApply`], followed, if appropriate, by a [`PipeOpFixFactors`]. +#' +#' Unlike R's `as.factor()` function, `ppl("convert_types")` will convert `ordered` types into (unordered) `factor` vectors. +#' +#' @param type_from `character` \cr +#' Which column types to convert. May be any combination of `"logical"`, `"integer"`, `"numeric"`, `"factor"`, `"ordered"`, `"character"`, or `"POSIXct"`. +#' @param type_to `character(1)` \cr +#' Which type to convert to. Must be a scalar value, exactly one of the types allowed in `type_from`. +#' @param affect_columns `function` | [`Selector`] | `NULL` \cr +#' Which columns to affect. This argument can further restrict the columns being converted, beyond the `type_from` argument. +#' Must be a [`Selector`]-like function, which takes a [`Task`][mlr3::Task] as argument and returns a `character` of features to use. +#' @param id `character(1)` | `NULL` \cr +#' ID to give to the constructed [`PipeOp`]s. +#' Defaults to an ID built automatically from `type_from` and `type_to`. +#' If a [`PipeOpFixFactors`] is appended, its ID will be `paste0(id, "_ff")`. +#' @param fixfactors `logical(1)` | `NULL` \cr +#' Whether to append a [`PipeOpFixFactors`]. Defaults to `TRUE` if and only if `type_to` is `"factor"` or `"ordered"`. +#' @param more_args `list` \cr +#' Additional arguments to give to the conversion function. This could e.g. be used to pass the timezone to `as.POSIXct`. +#' +#' @return [`Graph`] +#' @export +#' @examples +#' library("mlr3") +#' +#' data_chr = data.table::data.table( +#' x = factor(letters[1:3]), +#' y = letters[1:3], +#' z = letters[1:3] +#' ) +#' task_chr = TaskClassif$new("task_chr", data_chr, "x") +#' str(task_chr$data()) +#' +#' graph = ppl("convert_types", "character", "factor") +#' str(graph$train(task_chr)[[1]]$data()) +#' +#' graph_z = ppl("convert_types", "character", "factor", +#' affect_columns = selector_name("z")) +#' graph_z$train(task_chr)[[1]]$data() +#' +#' # `affect_columns` and `type_from` are both applied. The following +#' # looks for a 'numeric' column with name 'z', which is not present; +#' # the task is therefore unchanged. +#' graph_z = ppl("convert_types", "numeric", "factor", +#' affect_columns = selector_name("z")) +#' graph_z$train(task_chr)[[1]]$data() +pipeline_convert_types = function(type_from, type_to, affect_columns = NULL, id = NULL, fixfactors = NULL, more_args = list()) { + coltypes = mlr_reflections$task_feature_types + + + assert_character(type_from, any.missing = FALSE, unique = TRUE) + assert_subset(type_from, coltypes) + assert_choice(type_to, coltypes) + assert_function(affect_columns, null.ok = TRUE) + assert_string(id, null.ok = TRUE) + assert_flag(fixfactors, null.ok = TRUE) + assert_list(more_args) + + selector = selector_type(type_from) + if (!is.null(affect_columns)) { + selector = selector_intersect(selector, affect_columns) + } + if (is.null(id)) { + id = sprintf("convert_%s_to_%s", + paste(names(coltypes)[match(type_from, coltypes)], collapse = ""), + names(coltypes)[match(type_to, coltypes)] + ) + } + + converter = switch(type_to, + factor = crate(function(x) { + if (is.ordered(x)) { + cls <- class(x) + class(x) <- cls[cls != "ordered"] + } + as.factor(x) + }), + get(paste0("as.", type_to)) + ) + if (length(more_args)) { + converter = crate(function(x) { + mlr3misc::invoke(converter, x = x, .args = more_args) + }, converter, more_args) + } + if (is.null(fixfactors)) { + fixfactors = type_to %in% c("factor", "ordered") + } + po("colapply", + id = id, applicator = converter, affect_columns = selector + ) %>>!% if (fixfactors) po("fixfactors", id = paste0(id, "_ff")) +} + +mlr_graphs$add("convert_types", pipeline_convert_types) + diff --git a/R/pipeline_ovr.R b/R/pipeline_ovr.R index c4c7411fc..fc4c1dc5e 100644 --- a/R/pipeline_ovr.R +++ b/R/pipeline_ovr.R @@ -15,6 +15,7 @@ #' @return [`Graph`] #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' #' task = tsk("wine") @@ -42,6 +43,7 @@ #' po("classifavg", collect_multiplicity = TRUE) #' g3$train(task) #' g3$predict(task) +#' \dontshow{ \} } pipeline_ovr = function(graph) { PipeOpOVRSplit$new() %>>!% graph %>>!% PipeOpOVRUnite$new() } diff --git a/R/pipeline_robustify.R b/R/pipeline_robustify.R index abb386c4b..fbce8d84a 100644 --- a/R/pipeline_robustify.R +++ b/R/pipeline_robustify.R @@ -60,6 +60,7 @@ #' @return [`Graph`] #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' \donttest{ #' library(mlr3) #' lrn = lrn("regr.rpart") @@ -67,6 +68,7 @@ #' gr = pipeline_robustify(task, lrn) %>>% po("learner", lrn) #' resample(task, GraphLearner$new(gr), rsmp("holdout")) #' } +#' \dontshow{ \} } pipeline_robustify = function(task = NULL, learner = NULL, impute_missings = NULL, factors_to_numeric = NULL, max_cardinality = 1000, ordered_action = "factor", character_action = "factor", POSIXct_action = "numeric") { diff --git a/R/pipeline_stacking.R b/R/pipeline_stacking.R index 4ac1f0a0d..26741e4ac 100644 --- a/R/pipeline_stacking.R +++ b/R/pipeline_stacking.R @@ -26,6 +26,7 @@ #' #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' if (requireNamespace("kknn")) { #' library(mlr3) #' library(mlr3learners) @@ -40,6 +41,7 @@ #' graph_learner = as_learner(graph_stack) #' graph_learner$train(tsk("german_credit")) #' } +#' \dontshow{ \} } pipeline_stacking = function(base_learners, super_learner, method = "cv", folds = 3, use_features = TRUE) { assert_learners(base_learners) assert_learner(super_learner) diff --git a/R/pipeline_targettrafo.R b/R/pipeline_targettrafo.R index 72fff3483..5a3989e31 100644 --- a/R/pipeline_targettrafo.R +++ b/R/pipeline_targettrafo.R @@ -25,6 +25,7 @@ #' @return [`Graph`] #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' #' tt = pipeline_targettrafo(PipeOpLearner$new(LearnerRegrRpart$new())) @@ -46,6 +47,7 @@ #' src_channel = 2, dst_channel = 1) #' g$add_edge(src_id = "regr.rpart", dst_id = "targetinvert", #' src_channel = 1, dst_channel = 2) +#' \dontshow{ \} } pipeline_targettrafo = function(graph, trafo_pipeop = PipeOpTargetMutate$new(), id_prefix = "") { graph = as_graph(graph, clone = TRUE) if (graph$pipeops[[graph$input$op.id]]$innum != 1L) { diff --git a/R/po.R b/R/po.R index e071ffe7b..c60cdaea3 100644 --- a/R/po.R +++ b/R/po.R @@ -32,6 +32,7 @@ #' @return A [`PipeOp`] (for `po()`), or a `list` of [`PipeOp`]s (for `pos()`). #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' #' po("learner", lrn("classif.rpart"), cp = 0.3) @@ -43,6 +44,7 @@ #' param_vals = list(cp = 0.3)) #' #' pos(c("pca", original = "nop")) +#' \dontshow{ \} } po = function(.obj, ...) { UseMethod("po") } diff --git a/R/ppl.R b/R/ppl.R index 1754fb9de..900dd53a0 100644 --- a/R/ppl.R +++ b/R/ppl.R @@ -18,10 +18,12 @@ #' @return [`Graph`] (for `ppl()`) or `list` of [`Graph`]s (for `ppls()`). #' @export #' @examples +#' \dontshow{ if (requireNamespace("rpart")) \{ } #' library("mlr3") #' #' gr = ppl("bagging", graph = po(lrn("regr.rpart")), #' averager = po("regravg", collect_multiplicity = TRUE)) +#' \dontshow{ \} } ppl = function(.key, ...) { dictionary_sugar_get(dict = mlr_graphs, .key = .key, ...) } diff --git a/attic/dependify.sh b/attic/dependify.sh new file mode 100755 index 000000000..cd67d86df --- /dev/null +++ b/attic/dependify.sh @@ -0,0 +1,32 @@ +#!/bin/bash +if [ -z "$2" ] ; then + echo "Usage: $0 " >&2 + exit 1 +fi +PATPART="$1" +REQUIRE="$2" +# Loop through each .R file in the current directory +for file in *.R; do + # Check if file contains the pattern "^#'.*rpart" + if grep -q "^#'.*${PATPART}" "$file"; then + # Use awk to edit the file in-place + awk ' + # Set a flag when the @examples line is found + /^#'"'"' @examples$/ { + print; + print "#'"'"' \\dontshow{ if (requireNamespace(\"'"${REQUIRE}"'\")) \\{ }"; + found = 1; + next; + } + + # After the @examples line, insert the closing dontshow before the first non-#" or #" @ line + found && (!/^#'"'"'/ || /^#'"'"' @/) { + print "#'"'"' \\dontshow{ \\} }"; + found = 0; + } + + # Print every line of the file + { print; } + ' "$file" > tmp_file && mv tmp_file "$file" + fi +done diff --git a/attic/dependify_tests.sh b/attic/dependify_tests.sh new file mode 100755 index 000000000..481de7a81 --- /dev/null +++ b/attic/dependify_tests.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +if [ -z "$2" ] ; then + echo "Usage: $0 " >&2 + exit 1 +fi +PATPART="$1" +REQUIRE="$2" + +# Loop through all .R files in the current directory +for file in *.R; do + # Use awk to edit the file in-place + awk ' + # Enter a test_that block + /^test_that\(/ { + buffer = $0 # Start buffering the block + capture = 1 # Set flag to capture lines + insert_needed = 0 # Reset insertion flag + next + } + + # Detect the end of a block + /^})$/ && capture { + if (insert_needed) { + # Insert the line at the beginning of the block + sub(/\{/, "{\n skip_if_not_installed(\"'"${REQUIRE}"'\")", buffer) + } + print buffer # Print the modified buffer + print "})" + buffer = "" # Clear buffer + capture = 0 # Stop capturing + next + } + + # While capturing the block + capture { + buffer = buffer "\n" $0 # Add line to buffer + # If line contains "PipeOpLrnRP", mark for insertion + if (/'"${PATPART}"'/) + insert_needed = 1 + } + + # Print all lines outside of blocks normally + !capture { + print + } + ' "$file" > tmpfile && mv tmpfile "$file" # Output redirection and file replacement +done + diff --git a/attic/experiments.R b/attic/experiments.R index 40122d84b..e94bfad85 100644 --- a/attic/experiments.R +++ b/attic/experiments.R @@ -10,11 +10,13 @@ data.table::setDTthreads(0) data.table::setDTthreads(1) Sys.setenv(NOT_CRAN = "true") -devtools::document("mlr3pipelines") +devtools::document() # devtools::load_all("paradox") -devtools::load_all("mlr3pipelines") +devtools::load_all() + +devtools::test(filter = "textvectorizer") Sys.setenv(TESTTHAT_CPUS = 20) testthat::test_package("mlr3pipelines") diff --git a/tests/testthat/helper_compat.R b/inst/testthat/helper_compat.R similarity index 100% rename from tests/testthat/helper_compat.R rename to inst/testthat/helper_compat.R diff --git a/tests/testthat/helper_functions.R b/inst/testthat/helper_functions.R similarity index 98% rename from tests/testthat/helper_functions.R rename to inst/testthat/helper_functions.R index 5eb4006b8..2ee53cace 100644 --- a/tests/testthat/helper_functions.R +++ b/inst/testthat/helper_functions.R @@ -233,10 +233,10 @@ expect_datapreproc_pipeop_class = function(poclass, constargs = list(), task, expect_equal(po$innum, 1) expect_equal(po$outnum, 1) - expect_true(are_types_compatible(po$input$train, "Task")) - expect_true(are_types_compatible(po$input$predict, "Task")) - expect_true(are_types_compatible(po$output$train, "Task")) - expect_true(are_types_compatible(po$output$predict, "Task")) + expect_true(mlr3pipelines:::are_types_compatible(po$input$train, "Task")) + expect_true(mlr3pipelines:::are_types_compatible(po$input$predict, "Task")) + expect_true(mlr3pipelines:::are_types_compatible(po$output$train, "Task")) + expect_true(mlr3pipelines:::are_types_compatible(po$output$predict, "Task")) expect_error(po$train(list(NULL)), "class.*Task.*but has class") diff --git a/tests/testthat/helper_test_pipeops.R b/inst/testthat/helper_test_pipeops.R similarity index 96% rename from tests/testthat/helper_test_pipeops.R rename to inst/testthat/helper_test_pipeops.R index 5265177bb..7ca37f5cf 100644 --- a/tests/testthat/helper_test_pipeops.R +++ b/inst/testthat/helper_test_pipeops.R @@ -71,7 +71,7 @@ VarargPipeop = R6Class("VarargPipeop", public = list( initialize = function(id = "vararg", innum = 0, param_vals = list()) { super$initialize(id, param_vals = param_vals, - input = data.table(name = c("...", rep_suffix("input", innum)), train = "*", predict = "*"), + input = data.table(name = c("...", mlr3pipelines:::rep_suffix("input", innum)), train = "*", predict = "*"), output = data.table(name = "output", train = "*", predict = "*") ) }), diff --git a/man/mlr_graphs.Rd b/man/mlr_graphs.Rd index 116ed9b07..e11fcf981 100644 --- a/man/mlr_graphs.Rd +++ b/man/mlr_graphs.Rd @@ -35,6 +35,7 @@ Returns a \code{data.table} with column \code{key} (\code{character}). } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library(mlr3) lrn = lrn("regr.rpart") task = mlr_tasks$get("boston_housing") @@ -48,6 +49,7 @@ gr = ppl("robustify", task, lrn) \%>>\% po("learner", lrn) # all Graphs currently in the dictionary: as.data.table(mlr_graphs) +\dontshow{ \} } } \seealso{ Other mlr3pipelines backend related: diff --git a/man/mlr_graphs_bagging.Rd b/man/mlr_graphs_bagging.Rd index 58d5e1e83..790562c66 100644 --- a/man/mlr_graphs_bagging.Rd +++ b/man/mlr_graphs_bagging.Rd @@ -54,6 +54,7 @@ This is done as follows: All input arguments are cloned and have no references in common with the returned \code{\link{Graph}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } \donttest{ library(mlr3) lrn_po = po("learner", lrn("regr.rpart")) @@ -66,4 +67,5 @@ gr = ppl("bagging", lrn_po, frac = 1, replace = TRUE, averager = po("regravg", collect_multiplicity = TRUE)) resample(task, GraphLearner$new(gr), rsmp("holdout"))$aggregate() } +\dontshow{ \} } } diff --git a/man/mlr_graphs_convert_types.Rd b/man/mlr_graphs_convert_types.Rd new file mode 100644 index 000000000..c557c04b9 --- /dev/null +++ b/man/mlr_graphs_convert_types.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_convert_types.R +\name{mlr_graphs_convert_types} +\alias{mlr_graphs_convert_types} +\alias{pipeline_convert_types} +\title{Convert Column Types} +\usage{ +pipeline_convert_types( + type_from, + type_to, + affect_columns = NULL, + id = NULL, + fixfactors = NULL, + more_args = list() +) +} +\arguments{ +\item{type_from}{\code{character} \cr +Which column types to convert. May be any combination of \code{"logical"}, \code{"integer"}, \code{"numeric"}, \code{"factor"}, \code{"ordered"}, \code{"character"}, or \code{"POSIXct"}.} + +\item{type_to}{\code{character(1)} \cr +Which type to convert to. Must be a scalar value, exactly one of the types allowed in \code{type_from}.} + +\item{affect_columns}{\code{function} | \code{\link{Selector}} | \code{NULL} \cr +Which columns to affect. This argument can further restrict the columns being converted, beyond the \code{type_from} argument. +Must be a \code{\link{Selector}}-like function, which takes a \code{\link[mlr3:Task]{Task}} as argument and returns a \code{character} of features to use.} + +\item{id}{\code{character(1)} | \code{NULL} \cr +ID to give to the constructed \code{\link{PipeOp}}s. +Defaults to an ID built automatically from \code{type_from} and \code{type_to}. +If a \code{\link{PipeOpFixFactors}} is appended, its ID will be \code{paste0(id, "_ff")}.} + +\item{fixfactors}{\code{logical(1)} | \code{NULL} \cr +Whether to append a \code{\link{PipeOpFixFactors}}. Defaults to \code{TRUE} if and only if \code{type_to} is \code{"factor"} or \code{"ordered"}.} + +\item{more_args}{\code{list} \cr +Additional arguments to give to the conversion function. This could e.g. be used to pass the timezone to \code{as.POSIXct}.} +} +\value{ +\code{\link{Graph}} +} +\description{ +Converts all columns of type \code{type_from} to \code{type_to}, using the corresponding R function (e.g. \code{as.numeric()}, \code{as.factor()}). +It is possible to further subset the columns that should be affected using the \code{affect_columns} argument. +The resulting \code{\link{Graph}} contains a \code{\link{PipeOpColApply}}, followed, if appropriate, by a \code{\link{PipeOpFixFactors}}. + +Unlike R's \code{as.factor()} function, \code{ppl("convert_types")} will convert \code{ordered} types into (unordered) \code{factor} vectors. +} +\examples{ +library("mlr3") + +data_chr = data.table::data.table( + x = factor(letters[1:3]), + y = letters[1:3], + z = letters[1:3] +) +task_chr = TaskClassif$new("task_chr", data_chr, "x") +str(task_chr$data()) + +graph = ppl("convert_types", "character", "factor") +str(graph$train(task_chr)[[1]]$data()) + +graph_z = ppl("convert_types", "character", "factor", + affect_columns = selector_name("z")) +graph_z$train(task_chr)[[1]]$data() + +# `affect_columns` and `type_from` are both applied. The following +# looks for a 'numeric' column with name 'z', which is not present; +# the task is therefore unchanged. +graph_z = ppl("convert_types", "numeric", "factor", + affect_columns = selector_name("z")) +graph_z$train(task_chr)[[1]]$data() +} diff --git a/man/mlr_graphs_ovr.Rd b/man/mlr_graphs_ovr.Rd index eb8064d89..c38449cfa 100644 --- a/man/mlr_graphs_ovr.Rd +++ b/man/mlr_graphs_ovr.Rd @@ -23,6 +23,7 @@ perform "One vs. Rest" classification. All input arguments are cloned and have no references in common with the returned \code{\link{Graph}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") task = tsk("wine") @@ -50,4 +51,5 @@ g3 = po("replicate", reps = 3) \%>>\% po("classifavg", collect_multiplicity = TRUE) g3$train(task) g3$predict(task) +\dontshow{ \} } } diff --git a/man/mlr_graphs_robustify.Rd b/man/mlr_graphs_robustify.Rd index 0487f03d0..7845cae3b 100644 --- a/man/mlr_graphs_robustify.Rd +++ b/man/mlr_graphs_robustify.Rd @@ -86,6 +86,7 @@ factor variables, no encoding is performed. All input arguments are cloned and have no references in common with the returned \code{\link{Graph}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } \donttest{ library(mlr3) lrn = lrn("regr.rpart") @@ -93,4 +94,5 @@ task = mlr_tasks$get("boston_housing") gr = pipeline_robustify(task, lrn) \%>>\% po("learner", lrn) resample(task, GraphLearner$new(gr), rsmp("holdout")) } +\dontshow{ \} } } diff --git a/man/mlr_graphs_stacking.Rd b/man/mlr_graphs_stacking.Rd index 1c29e56d3..799ae5ee3 100644 --- a/man/mlr_graphs_stacking.Rd +++ b/man/mlr_graphs_stacking.Rd @@ -43,6 +43,7 @@ features in order to predict the outcome. All input arguments are cloned and have no references in common with the returned \code{\link{Graph}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } if (requireNamespace("kknn")) { library(mlr3) library(mlr3learners) @@ -57,4 +58,5 @@ graph_stack = pipeline_stacking(base_learners, super_learner) graph_learner = as_learner(graph_stack) graph_learner$train(tsk("german_credit")) } +\dontshow{ \} } } diff --git a/man/mlr_graphs_targettrafo.Rd b/man/mlr_graphs_targettrafo.Rd index 847e35b31..7f4664334 100644 --- a/man/mlr_graphs_targettrafo.Rd +++ b/man/mlr_graphs_targettrafo.Rd @@ -40,6 +40,7 @@ parameters \code{trafo} and \code{inverter} of the \code{param_set} of the resul All input arguments are cloned and have no references in common with the returned \code{\link{Graph}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") tt = pipeline_targettrafo(PipeOpLearner$new(LearnerRegrRpart$new())) @@ -61,4 +62,5 @@ g$add_edge(src_id = "targetmutate", dst_id = "regr.rpart", src_channel = 2, dst_channel = 1) g$add_edge(src_id = "regr.rpart", dst_id = "targetinvert", src_channel = 1, dst_channel = 2) +\dontshow{ \} } } diff --git a/man/mlr_learners_graph.Rd b/man/mlr_learners_graph.Rd index e5d52542a..e331c9776 100644 --- a/man/mlr_learners_graph.Rd +++ b/man/mlr_learners_graph.Rd @@ -83,6 +83,7 @@ recommended. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") graph = po("pca") \%>>\% lrn("classif.rpart") @@ -101,6 +102,7 @@ lr$graph_model$pipeops$classif.rpart$learner_model$model # Feature importance (of principal components): lr$graph_model$pipeops$classif.rpart$learner_model$importance() +\dontshow{ \} } } \seealso{ Other Learners: diff --git a/man/mlr_pipeops.Rd b/man/mlr_pipeops.Rd index aa16b447e..bb4c99164 100644 --- a/man/mlr_pipeops.Rd +++ b/man/mlr_pipeops.Rd @@ -46,6 +46,7 @@ Returns a \code{data.table} with columns \code{key} (\code{character}), \code{pa } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") mlr_pipeops$get("learner", lrn("classif.rpart")) @@ -55,6 +56,7 @@ po("learner", learner = lrn("classif.rpart")) # all PipeOps currently in the dictionary: as.data.table(mlr_pipeops)[, c("key", "input.num", "output.num", "packages")] +\dontshow{ \} } } \seealso{ Other mlr3pipelines backend related: diff --git a/man/mlr_pipeops_boxcox.Rd b/man/mlr_pipeops_boxcox.Rd index 5ab6764c6..6fdb67c46 100644 --- a/man/mlr_pipeops_boxcox.Rd +++ b/man/mlr_pipeops_boxcox.Rd @@ -67,6 +67,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}} } \examples{ +\dontshow{ if (requireNamespace("bestNormalize")) \{ } library("mlr3") task = tsk("iris") @@ -76,6 +77,7 @@ task$data() pop$train(list(task))[[1]]$data() pop$state +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_classifavg.Rd b/man/mlr_pipeops_classifavg.Rd index e01d42e9d..3bc15b736 100644 --- a/man/mlr_pipeops_classifavg.Rd +++ b/man/mlr_pipeops_classifavg.Rd @@ -77,6 +77,7 @@ Only methods inherited from \code{\link{PipeOpEnsemble}}/\code{\link{PipeOp}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } \donttest{ library("mlr3") @@ -90,6 +91,7 @@ gr = ppl("greplicate", resample(tsk("iris"), GraphLearner$new(gr), rsmp("holdout")) } +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_encode.Rd b/man/mlr_pipeops_encode.Rd index 6a8c513c5..94f09b863 100644 --- a/man/mlr_pipeops_encode.Rd +++ b/man/mlr_pipeops_encode.Rd @@ -8,7 +8,7 @@ \code{\link{R6Class}} object inheriting from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. } \description{ -Encodes columns of type \code{factor}, \code{character} and \code{ordered}. +Encodes columns of type \code{factor} and \code{ordered}. Possible encodings are \code{"one-hot"} encoding, as well as encoding according to \code{stats::contr.helmert()}, \code{stats::contr.poly()}, \code{stats::contr.sum()} and \code{stats::contr.treatment()}. @@ -16,6 +16,8 @@ Newly created columns are named via pattern \verb{[column-name].[x]} where \code \code{"treatment"} encoding, and an integer sequence otherwise. Use the \code{\link{PipeOpTaskPreproc}} \verb{$affect_columns} functionality to only encode a subset of columns, or only encode columns of a certain type. + +\code{character}-type features can be encoded by converting them \code{factor} features first, using \code{\link[=mlr_graphs_convert_types]{ppl("convert_types", "character", "factor")}}. } \section{Construction}{ @@ -34,7 +36,7 @@ List of hyperparameter settings, overwriting the hyperparameter settings that wo Input and output channels are inherited from \code{\link{PipeOpTaskPreproc}}. -The output is the input \code{\link[mlr3:Task]{Task}} with all affected \code{factor}, \code{character} or \code{ordered} parameters encoded according to the \code{method} +The output is the input \code{\link[mlr3:Task]{Task}} with all affected \code{factor} and \code{ordered} parameters encoded according to the \code{method} parameter. } @@ -97,6 +99,14 @@ poe$train(list(task))[[1]]$data() poe$param_set$values$method = "sum" poe$train(list(task))[[1]]$data() + +# converting character-columns +data_chr = data.table::data.table(x = factor(letters[1:3]), y = letters[1:3]) +task_chr = TaskClassif$new("task_chr", data_chr, "x") + +goe = ppl("convert_types", "character", "factor") \%>>\% po("encode") + +goe$train(task_chr)[[1]]$data() } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_encodelmer.Rd b/man/mlr_pipeops_encodelmer.Rd index ca483a8b0..ce9b96575 100644 --- a/man/mlr_pipeops_encodelmer.Rd +++ b/man/mlr_pipeops_encodelmer.Rd @@ -88,6 +88,8 @@ Only methods inherited \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. } \examples{ +\dontshow{ if (requireNamespace("nloptr")) \{ } +\dontshow{ if (requireNamespace("lme4")) \{ } library("mlr3") poe = po("encodelmer") @@ -100,6 +102,8 @@ task = TaskClassif$new("task", poe$train(list(task))[[1]]$data() poe$state +\dontshow{ \} } +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_filter.Rd b/man/mlr_pipeops_filter.Rd index 552e8fee0..650f46504 100644 --- a/man/mlr_pipeops_filter.Rd +++ b/man/mlr_pipeops_filter.Rd @@ -98,6 +98,8 @@ Methods inherited from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpT } \examples{ +\dontshow{ if (requireNamespace("mlr3filters")) \{ } +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") library("mlr3filters") \dontshow{data.table::setDTthreads(1)} @@ -125,6 +127,8 @@ gr = po("filter", filter = flt("auc"), filter.frac = 0.5) \%>>\% learner = GraphLearner$new(gr) rr = resample(task, learner, rsmp("holdout"), store_models = TRUE) rr$learners[[1]]$model$auc$scores +\dontshow{ \} } +\dontshow{ \} } } \references{ Wu Y, Boos DD, Stefanski LA (2007). diff --git a/man/mlr_pipeops_ica.Rd b/man/mlr_pipeops_ica.Rd index e17f982fc..f7394979a 100644 --- a/man/mlr_pipeops_ica.Rd +++ b/man/mlr_pipeops_ica.Rd @@ -93,6 +93,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}} } \examples{ +\dontshow{ if (requireNamespace("fastICA")) \{ } library("mlr3") task = tsk("iris") @@ -102,6 +103,7 @@ task$data() pop$train(list(task))[[1]]$data() pop$state +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_imputelearner.Rd b/man/mlr_pipeops_imputelearner.Rd index 1a829285f..bfd74292d 100644 --- a/man/mlr_pipeops_imputelearner.Rd +++ b/man/mlr_pipeops_imputelearner.Rd @@ -90,6 +90,7 @@ Only methods inherited from \code{\link{PipeOpImpute}}/\code{\link{PipeOp}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") task = tsk("pima") @@ -113,6 +114,7 @@ po = po("imputelearner", new_task = po$train(list(task = task))[[1]] new_task$missings() +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_kernelpca.Rd b/man/mlr_pipeops_kernelpca.Rd index 99ccd8150..0d969f45b 100644 --- a/man/mlr_pipeops_kernelpca.Rd +++ b/man/mlr_pipeops_kernelpca.Rd @@ -70,6 +70,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}} } \examples{ +\dontshow{ if (requireNamespace("kernlab")) \{ } library("mlr3") task = tsk("iris") @@ -77,6 +78,7 @@ pop = po("kernelpca", features = 3) # only keep top 3 components task$data() pop$train(list(task))[[1]]$data() +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_learner.Rd b/man/mlr_pipeops_learner.Rd index a23afe960..41cfc22f2 100644 --- a/man/mlr_pipeops_learner.Rd +++ b/man/mlr_pipeops_learner.Rd @@ -89,6 +89,7 @@ Methods inherited from \code{\link{PipeOp}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") task = tsk("iris") @@ -97,6 +98,7 @@ lrn_po = mlr_pipeops$get("learner", learner) lrn_po$train(list(task)) lrn_po$predict(list(task)) +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_learner_cv.Rd b/man/mlr_pipeops_learner_cv.Rd index 49c852291..b66dbd246 100644 --- a/man/mlr_pipeops_learner_cv.Rd +++ b/man/mlr_pipeops_learner_cv.Rd @@ -112,6 +112,7 @@ Methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") task = tsk("iris") @@ -132,6 +133,7 @@ graph$train(task) graph$pipeops$classif.rpart$learner$predict_type = "prob" graph$train(task) +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd index f1c88aeef..7ecab68d5 100644 --- a/man/mlr_pipeops_nmf.Rd +++ b/man/mlr_pipeops_nmf.Rd @@ -106,6 +106,8 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}} } \examples{ +\dontshow{ if (requireNamespace("NMF")) \{ } +\dontshow{ if (requireNamespace("MASS")) \{ } if (requireNamespace("NMF")) { library("mlr3") @@ -117,6 +119,8 @@ pop$train(list(task))[[1]]$data() pop$state } +\dontshow{ \} } +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_ovrsplit.Rd b/man/mlr_pipeops_ovrsplit.Rd index 4e100c041..9ab399f84 100644 --- a/man/mlr_pipeops_ovrsplit.Rd +++ b/man/mlr_pipeops_ovrsplit.Rd @@ -81,11 +81,13 @@ Only methods inherited from \code{\link{PipeOp}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library(mlr3) task = tsk("iris") po = po("ovrsplit") po$train(list(task)) po$predict(list(task)) +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_ovrunite.Rd b/man/mlr_pipeops_ovrunite.Rd index ddfabdba8..504030b05 100644 --- a/man/mlr_pipeops_ovrunite.Rd +++ b/man/mlr_pipeops_ovrunite.Rd @@ -74,6 +74,7 @@ Only methods inherited from \code{\link{PipeOpEnsemble}}/\code{\link{PipeOp}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library(mlr3) task = tsk("iris") gr = po("ovrsplit") \%>>\% lrn("classif.rpart") \%>>\% po("ovrunite") @@ -81,6 +82,7 @@ gr$train(task) gr$predict(task) gr$pipeops$classif.rpart$learner$predict_type = "prob" gr$predict(task) +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_proxy.Rd b/man/mlr_pipeops_proxy.Rd index 109a82307..2cd62784b 100644 --- a/man/mlr_pipeops_proxy.Rd +++ b/man/mlr_pipeops_proxy.Rd @@ -74,6 +74,7 @@ Only methods inherited from \code{\link{PipeOp}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") library("mlr3learners") @@ -92,6 +93,7 @@ g$param_set$values$preproc.content = po("pca") g$param_set$values$learner.content = lrn("classif.rpart") rr_pca_rpart = resample(task, learner = GraphLearner$new(g), resampling = rsmp("cv", folds = 3)) rr_pca_rpart$aggregate(msr("classif.ce")) +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_randomresponse.Rd b/man/mlr_pipeops_randomresponse.Rd index 78b211feb..f6ad01ecb 100644 --- a/man/mlr_pipeops_randomresponse.Rd +++ b/man/mlr_pipeops_randomresponse.Rd @@ -79,6 +79,7 @@ Only methods inherited from \code{\link{PipeOp}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library(mlr3) library(mlr3learners) @@ -95,6 +96,7 @@ g2$train(task2) g2$pipeops$regr.lm$learner$predict_type = "se" set.seed(2906) g2$predict(task2) +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_regravg.Rd b/man/mlr_pipeops_regravg.Rd index 95d8b4c7b..fee7ae964 100644 --- a/man/mlr_pipeops_regravg.Rd +++ b/man/mlr_pipeops_regravg.Rd @@ -70,6 +70,7 @@ Only methods inherited from \code{\link{PipeOpEnsemble}}/\code{\link{PipeOp}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") # Simple Bagging @@ -81,6 +82,7 @@ gr = ppl("greplicate", po("classifavg") resample(tsk("iris"), GraphLearner$new(gr), rsmp("holdout")) +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_smote.Rd b/man/mlr_pipeops_smote.Rd index ae4035a88..18986b57d 100644 --- a/man/mlr_pipeops_smote.Rd +++ b/man/mlr_pipeops_smote.Rd @@ -65,6 +65,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}} } \examples{ +\dontshow{ if (requireNamespace("smotefamily")) \{ } library("mlr3") # Create example task @@ -78,6 +79,7 @@ table(task$data()$result) pop = po("smote") smotedata = pop$train(list(task))[[1]]$data() table(smotedata$result) +\dontshow{ \} } } \references{ Chawla NV, Bowyer KW, Hall LO, Kegelmeyer WP (2002). diff --git a/man/mlr_pipeops_targetmutate.Rd b/man/mlr_pipeops_targetmutate.Rd index d3cab4420..2d65b74f5 100644 --- a/man/mlr_pipeops_targetmutate.Rd +++ b/man/mlr_pipeops_targetmutate.Rd @@ -78,6 +78,7 @@ Only methods inherited from \code{\link{PipeOpTargetTrafo}}/\code{\link{PipeOp}} } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library(mlr3) task = tsk("boston_housing") po = PipeOpTargetMutate$new("logtrafo", param_vals = list( @@ -108,6 +109,7 @@ g$predict(task) tt = ppl("targettrafo", graph = PipeOpLearner$new(LearnerRegrRpart$new())) tt$param_set$values$targetmutate.trafo = function(x) log(x, base = 2) tt$param_set$values$targetmutate.inverter = function(x) list(response = 2 ^ x$response) +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_targettrafoscalerange.Rd b/man/mlr_pipeops_targettrafoscalerange.Rd index 0cd636479..4979df763 100644 --- a/man/mlr_pipeops_targettrafoscalerange.Rd +++ b/man/mlr_pipeops_targettrafoscalerange.Rd @@ -61,6 +61,7 @@ Only methods inherited from \code{\link{PipeOpTargetTrafo}}/\code{\link{PipeOp}} } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library(mlr3) task = tsk("boston_housing") po = PipeOpTargetTrafoScaleRange$new() @@ -74,6 +75,7 @@ ttscalerange = ppl("targettrafo", trafo_pipeop = PipeOpTargetTrafoScaleRange$new ttscalerange$train(task) ttscalerange$predict(task) ttscalerange$state$regr.rpart +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_textvectorizer.Rd b/man/mlr_pipeops_textvectorizer.Rd index a352669ae..2b4a186c3 100644 --- a/man/mlr_pipeops_textvectorizer.Rd +++ b/man/mlr_pipeops_textvectorizer.Rd @@ -155,6 +155,8 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}} } \examples{ +\dontshow{ if (requireNamespace("stopwords")) \{ } +\dontshow{ if (requireNamespace("quanteda")) \{ } library("mlr3") library("data.table") # create some text data @@ -172,6 +174,8 @@ one_line_of_iris = task$filter(13) one_line_of_iris$data() pos$predict(list(one_line_of_iris))[[1]]$data() +\dontshow{ \} } +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_threshold.Rd b/man/mlr_pipeops_threshold.Rd index 9df94aa3b..a5a7fef22 100644 --- a/man/mlr_pipeops_threshold.Rd +++ b/man/mlr_pipeops_threshold.Rd @@ -61,12 +61,14 @@ Only methods inherited from \code{\link{PipeOp}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") t = tsk("german_credit") gr = po(lrn("classif.rpart", predict_type = "prob")) \%>>\% po("threshold", param_vals = list(thresholds = 0.9)) gr$train(t) gr$predict(t) +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_tunethreshold.Rd b/man/mlr_pipeops_tunethreshold.Rd index e56953239..683c87f61 100644 --- a/man/mlr_pipeops_tunethreshold.Rd +++ b/man/mlr_pipeops_tunethreshold.Rd @@ -78,6 +78,8 @@ Only methods inherited from \code{\link{PipeOp}}. } \examples{ +\dontshow{ if (requireNamespace("bbotk")) \{ } +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") task = tsk("iris") @@ -88,6 +90,8 @@ task$data() pop$train(task) pop$state +\dontshow{ \} } +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_updatetarget.Rd b/man/mlr_pipeops_updatetarget.Rd index e7499b9b8..188199986 100644 --- a/man/mlr_pipeops_updatetarget.Rd +++ b/man/mlr_pipeops_updatetarget.Rd @@ -70,6 +70,7 @@ Only methods inherited from \code{\link{PipeOp}}. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } \dontrun{ # Create a binary class task from iris library(mlr3) @@ -78,6 +79,7 @@ po = PipeOpUpdateTarget$new(param_vals = list(trafo = trafo_fun, new_target_name po$train(list(tsk("iris"))) po$predict(list(tsk("iris"))) } +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_vtreat.Rd b/man/mlr_pipeops_vtreat.Rd index 4589fdb3b..e9c26a3b2 100644 --- a/man/mlr_pipeops_vtreat.Rd +++ b/man/mlr_pipeops_vtreat.Rd @@ -122,6 +122,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}} } \examples{ +\dontshow{ if (requireNamespace("vtreat")) \{ } library("mlr3") set.seed(2020) @@ -140,6 +141,7 @@ task = TaskRegr$new("vtreat_regr", backend = make_data(100), target = "y") pop = PipeOpVtreat$new() pop$train(list(task)) +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/mlr_pipeops_yeojohnson.Rd b/man/mlr_pipeops_yeojohnson.Rd index 0c4c220f3..1fe4e40fe 100644 --- a/man/mlr_pipeops_yeojohnson.Rd +++ b/man/mlr_pipeops_yeojohnson.Rd @@ -68,6 +68,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}} } \examples{ +\dontshow{ if (requireNamespace("bestNormalize")) \{ } library("mlr3") task = tsk("iris") @@ -77,6 +78,7 @@ task$data() pop$train(list(task))[[1]]$data() pop$state +\dontshow{ \} } } \seealso{ https://mlr-org.com/pipeops.html diff --git a/man/po.Rd b/man/po.Rd index 963049501..4f44dd219 100644 --- a/man/po.Rd +++ b/man/po.Rd @@ -48,6 +48,7 @@ it to a \code{\link{PipeOp}}. \code{pos()} (with plural-s) takes either a \code{ list of objects, and creates a \code{list} of \code{\link{PipeOp}}s. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") po("learner", lrn("classif.rpart"), cp = 0.3) @@ -59,4 +60,5 @@ mlr_pipeops$get("learner", lrn("classif.rpart"), param_vals = list(cp = 0.3)) pos(c("pca", original = "nop")) +\dontshow{ \} } } diff --git a/man/ppl.Rd b/man/ppl.Rd index 713312f33..7a00567ee 100644 --- a/man/ppl.Rd +++ b/man/ppl.Rd @@ -32,8 +32,10 @@ Creates a \code{\link{Graph}} from \code{\link{mlr_graphs}} from given ID vector of any list and returns a \code{list} of possibly muliple \code{\link{Graph}}s. } \examples{ +\dontshow{ if (requireNamespace("rpart")) \{ } library("mlr3") gr = ppl("bagging", graph = po(lrn("regr.rpart")), averager = po("regravg", collect_multiplicity = TRUE)) +\dontshow{ \} } } diff --git a/tests/testthat/helper_mlr3pipelines.R b/tests/testthat/helper_mlr3pipelines.R new file mode 100644 index 000000000..851c20f87 --- /dev/null +++ b/tests/testthat/helper_mlr3pipelines.R @@ -0,0 +1,8 @@ +library("mlr3pipelines") +library("checkmate") +library("testthat") +library("R6") +library("mlr3misc") +library("paradox") + +lapply(list.files(system.file("testthat", package = "mlr3pipelines"), pattern = "^helper.*\\.[rR]", full.names = TRUE), source) diff --git a/tests/testthat/test_Graph.R b/tests/testthat/test_Graph.R index ed6f900d8..6a6004df4 100644 --- a/tests/testthat/test_Graph.R +++ b/tests/testthat/test_Graph.R @@ -1,6 +1,7 @@ context("Graph") test_that("linear graph", { + skip_if_not_installed("rpart") g = Graph$new() expect_equal(g$ids(sorted = TRUE), character(0)) @@ -78,6 +79,7 @@ test_that("complex graph", { "Training debug3 with input list(input_1 = 3, input_2 = 5, input_3 = 5)"), info = paste0("'", lines, "'", collapse = "', '")) + skip_if_not_installed("igraph") pdf(file = NULL) # don't show plot. It is annoying. biggraph$plot() dev.off() @@ -149,6 +151,7 @@ test_that("input / output lists and naming", { # output should be debug2.3, debug3.1, debug3.2 # (inputs and outputs in PipeOp order first, in channel order second) + skip_if_not_installed("igraph") pdf(file = NULL) # don't show plot. It is annoying. gr$plot() dev.off() @@ -246,6 +249,7 @@ test_that("Empty Graph", { expect_output(print(Graph$new()), "^Empty Graph\\.$") + skip_if_not_installed("igraph") expect_output(Graph$new()$plot(), "^Empty Graph, not plotting\\.$") expect_equal(gunion(list()), Graph$new()) @@ -377,6 +381,7 @@ test_that("Graph with vararg input", { }) test_that("single pipeop plot", { + skip_if_not_installed("igraph") imp_num = po("imputehist") graph = as_graph(imp_num) diff --git a/tests/testthat/test_GraphLearner.R b/tests/testthat/test_GraphLearner.R index 631aec822..b8d7068b0 100644 --- a/tests/testthat/test_GraphLearner.R +++ b/tests/testthat/test_GraphLearner.R @@ -1,6 +1,7 @@ context("GraphLearner") test_that("basic graphlearner tests", { + skip_if_not_installed("rpart") skip_on_cran() # takes too long task = mlr_tasks$get("iris") @@ -73,6 +74,7 @@ test_that("basic graphlearner tests", { }) test_that("GraphLearner clone_graph FALSE", { + skip_if_not_installed("rpart") # prepare graph gr1 = po("pca") %>>% lrn("classif.rpart") @@ -174,6 +176,7 @@ test_that("graphlearner parameters behave as they should", { }) test_that("graphlearner type inference", { + skip_if_not_installed("rpart") skip_on_cran() # takes too long # default: classif lrn = GraphLearner$new(mlr_pipeops$get("nop")) @@ -242,6 +245,7 @@ test_that("graphlearner type inference", { }) test_that("graphlearner type inference - branched", { + skip_if_not_installed("rpart") skip_on_cran() # takes too long # default: classif @@ -306,6 +310,7 @@ test_that("graphlearner type inference - branched", { }) test_that("graphlearner predict type inference", { + skip_if_not_installed("rpart") skip_on_cran() # takes too long # Getter: @@ -403,6 +408,7 @@ test_that("graphlearner predict type inference", { test_that("GraphLearner model", { + skip_if_not_installed("rpart") graph = po("pca") %>>% lrn("classif.rpart") graph2 = graph$clone(deep = TRUE) graph_orig = graph$clone(deep = TRUE) @@ -437,6 +443,7 @@ test_that("GraphLearner model", { }) test_that("predict() function for Graph", { + skip_if_not_installed("rpart") lx = as_graph(lrn("classif.rpart")) @@ -465,6 +472,7 @@ test_that("predict() function for Graph", { }) test_that("base_learner() works", { + skip_if_not_installed("rpart") # graph containing single PipeOpLearner x = as_learner(as_graph(lrn("classif.rpart"))) # untrained @@ -517,6 +525,7 @@ test_that("base_learner() works", { test_that("GraphLearner hashes", { + skip_if_not_installed("rpart") learner1 = as_learner(ppl("robustify") %>>% lrn("regr.rpart")) diff --git a/tests/testthat/test_conversion.R b/tests/testthat/test_conversion.R index 5960a4abe..ad77300e4 100644 --- a/tests/testthat/test_conversion.R +++ b/tests/testthat/test_conversion.R @@ -22,6 +22,8 @@ test_that("type conversions in graph creation", { }) test_that("learner conversion in graph creation", { + skip_if_not_installed("rpart") + skip_if_not_installed("rpart") gr1 = Graph$new()$add_pipeop(lrn("classif.rpart")) gr2 = Graph$new()$add_pipeop(LearnerClassifRpart$new()) gr3 = Graph$new()$add_pipeop(mlr_pipeops$get("learner", lrn("classif.rpart"))) @@ -50,6 +52,7 @@ test_that("learner conversion in graph creation", { }) test_that("assertions work", { + skip_if_not_installed("rpart") expect_error(as_pipeop("test")) expect_error(assert_pipeop(lrn("classif.rpart"))) @@ -90,6 +93,7 @@ test_that("auto-gunion", { }) test_that("po for Filter", { + skip_if_not_installed("mlr3filters") flt = mlr3filters::FilterVariance$new() flt$param_set$values$na.rm = TRUE @@ -105,6 +109,7 @@ test_that("po for Filter", { }) test_that("po for Learner", { + skip_if_not_installed("rpart") lrn = LearnerClassifRpart$new() lrn$param_set$values$xval = 9 @@ -120,6 +125,7 @@ test_that("po for Learner", { }) test_that("Graph to GraphLearner", { + skip_if_not_installed("rpart") grph = po("pca") %>>% po(lrn("classif.rpart")) @@ -142,6 +148,7 @@ test_that("Graph to GraphLearner", { }) test_that("PipeOp to GraphLearner", { + skip_if_not_installed("rpart") po = po("proxy", param_vals = list(content = lrn("classif.rpart"))) diff --git a/tests/testthat/test_dictionary.R b/tests/testthat/test_dictionary.R index 1a598668c..a35a902fa 100644 --- a/tests/testthat/test_dictionary.R +++ b/tests/testthat/test_dictionary.R @@ -2,6 +2,8 @@ context("Dictionary") # we check that all pipeops that are exported are also in the dictionary, and can be constructed from there. test_that("Dictionary contains all PipeOps", { + skip_if_not_installed("mlr3filters") + skip_if_not_installed("rpart") skip_on_cran() oldwarn = options(warn = 2) @@ -80,6 +82,7 @@ test_that("Dictionary contains all PipeOps", { # the loop now checks whether we can construct each pipeop from the dictionary *and* by itself for (idx in seq_along(dictnames)) { + if (dictnames[[idx]] == "filter") next # TODO: remove this when https://github.com/mlr-org/mlr3filters/issues/162 is solved pogen = get(pipeops[idx], pkgenv) # the constructor, as found in the package namespace dictname = dictnames[idx] # the "key" in the mlr_pipeops dictionary @@ -217,6 +220,7 @@ test_that("data.table of pipeops looks as it should", { }) test_that("GraphLearner is in mlr_learners", { + skip_if_not_installed("rpart") expect_data_table(as.data.table(mlr_learners)) # can construct mlr_learners table diff --git a/tests/testthat/test_learner_weightedaverage.R b/tests/testthat/test_learner_weightedaverage.R index 560e9fa8e..dc77bd815 100644 --- a/tests/testthat/test_learner_weightedaverage.R +++ b/tests/testthat/test_learner_weightedaverage.R @@ -1,6 +1,7 @@ context("WeightedAverage Learner") test_that("LearnerClassifAvg", { + skip_if_not_installed("rpart") skip_on_cran() # takes too long lrn = LearnerClassifAvg$new() expect_learner(lrn) @@ -100,6 +101,7 @@ test_that("LearnerRegrAvg", { }) test_that("LearnerClassifAvg Pipeline", { + skip_if_not_installed("rpart") skip_on_cran() # takes too long tsk = mlr_tasks$get("iris") # Works for response @@ -146,6 +148,7 @@ test_that("LearnerClassifAvg Pipeline", { }) test_that("LearnerRegrAvg Pipeline", { + skip_if_not_installed("rpart") skip_on_cran() # takes too long tsk = mlr_tasks$get("boston_housing_classic") # Works for response diff --git a/tests/testthat/test_mlr_graphs_bagging.R b/tests/testthat/test_mlr_graphs_bagging.R index 3cd5bee95..14836b705 100644 --- a/tests/testthat/test_mlr_graphs_bagging.R +++ b/tests/testthat/test_mlr_graphs_bagging.R @@ -1,6 +1,7 @@ context("ppl - pipeline_bagging") test_that("Bagging Pipeline", { + skip_if_not_installed("rpart") skip_on_cran() # takes too long expect_error(ppl("bagging", graph = lrn("classif.rpart"), averager = po("classifavg", collect_multiplicity = FALSE)), @@ -39,6 +40,7 @@ test_that("Bagging Pipeline", { }) test_that("Bagging with replacement", { + skip_if_not_installed("rpart") tsk = tsk("iris") lrn = lrn("classif.rpart") p = ppl("bagging", graph = po(lrn), replace = TRUE, averager = po("classifavg", collect_multiplicity = TRUE)) diff --git a/tests/testthat/test_mlr_graphs_branching.R b/tests/testthat/test_mlr_graphs_branching.R index 92f635d59..e76a55473 100644 --- a/tests/testthat/test_mlr_graphs_branching.R +++ b/tests/testthat/test_mlr_graphs_branching.R @@ -1,6 +1,7 @@ context("ppl - pipeline_branch") test_that("Branching Pipeline", { + skip_if_not_installed("rpart") lrns = map(list(lrn("classif.rpart"), lrn("classif.featureless")), po) task = mlr_tasks$get("boston_housing_classic") gr = pipeline_branch(lrns) diff --git a/tests/testthat/test_mlr_graphs_convert_types.R b/tests/testthat/test_mlr_graphs_convert_types.R new file mode 100644 index 000000000..e75f0fc0c --- /dev/null +++ b/tests/testthat/test_mlr_graphs_convert_types.R @@ -0,0 +1,159 @@ +context("ppl - pipeline_convert_types") + +test_that("general tests", { + + data_chr = data.table::data.table( + x = factor(letters[1:3]), + y = letters[1:3], + z = letters[1:3] + ) + + data_fct = data.table::data.table( + x = factor(letters[1:3]), + y = factor(letters[1:3]), + z = factor(letters[1:3]) + ) + + data_ord = data.table::data.table( + x = factor(letters[1:3]), + y = ordered(letters[1:3]), + z = ordered(letters[1:3]) + ) + + task_chr = TaskClassif$new("task_chr", data_chr, "x") + task_fct = TaskClassif$new("task_fct", data_fct, "x") + task_ord = TaskClassif$new("task_ord", data_ord, "x") + + graph = ppl("convert_types", "character", "factor") + expect_equal(graph$train(task_chr)[[1]]$data(), data_fct) + + graph = ppl("convert_types", c("ordered", "character"), "factor") + expect_equal(graph$train(task_chr)[[1]]$data(), data_fct) + expect_equal(graph$train(task_ord)[[1]]$data(), data_fct) + + graph_z = ppl("convert_types", "character", "factor", + affect_columns = selector_name("z")) + expect_equal(graph_z$train(task_chr)[[1]]$data()[, c("x", "z", "y")], cbind(data_fct[, c("x", "z")], data_chr[, "y"])) + + graph_z = ppl("convert_types", "numeric", "factor", + affect_columns = selector_name("z")) + expect_equal(graph_z$train(task_chr)[[1]]$data(), data_chr) + + graph_z = ppl("convert_types", "ordered", "factor", + affect_columns = selector_name("z")) + expect_equal(graph_z$train(task_ord)[[1]]$data()[, c("x", "z", "y")], cbind(data_fct[, c("x", "z")], data_ord[, "y"])) + + expect_equal(graph_z$train(task_fct)[[1]]$data()[, c("x", "z", "y")], data_fct[, c("x", "z", "y")]) + expect_equal(graph_z$train(task_chr)[[1]]$data()[, c("x", "z", "y")], data_chr[, c("x", "z", "y")]) + +}) + +test_that("more_args", { + data_time = data.table::data.table( + x = factor(letters[1:3]), + y = paste0("2024-02-01 00:00:0", 1:3) + ) + + data_time_weird = data.table::data.table( + x = factor(letters[1:3]), + y = paste0("02/2024/01 00:00:0", 1:3) + ) + + task_time = TaskClassif$new("task_time", data_time, "x") + task_time_weird = TaskClassif$new("task_time_weird", data_time_weird, "x") + + expect_equal( + ppl("convert_types", "character", "POSIXct")$train(task_time)[[1]]$data()[, y], + as.POSIXct(data_time$y) + ) + + expect_equal( + ppl("convert_types", "character", "POSIXct", + more_args = list(format = "%m/%Y/%d %H:%M:%OS") + )$train(task_time_weird)[[1]]$data()[, y], + as.POSIXct(data_time$y) + ) +}) + +test_that("fixfactors", { + + data_chr = data.table::data.table( + x = factor(letters[1:3]), + y = letters[1:3], + z = letters[1:3] + ) + + data_fct = data.table::data.table( + x = factor(letters[1:3]), + y = factor(letters[1:3]), + z = factor(letters[1:3]) + ) + task_chr = TaskClassif$new("task_chr", data_chr, "x") + task_fct = TaskClassif$new("task_fct", data_fct, "x") + + nadata <- data.table(y = factor(c("a", "b", NA)), z = factor(c("a", "b", NA))) + + + graph = ppl("convert_types", "character", "factor") + + graph$train(task_chr$clone(deep = TRUE)$filter(1:2)) + expect_equal(graph$predict(task_chr)[[1]]$data()[, c("y", "z")], nadata) + + graph$train(task_fct$clone(deep = TRUE)$filter(1:2)) + expect_equal(graph$predict(task_fct)[[1]]$data()[, c("y", "z")], nadata) + + graph = ppl("convert_types", "character", "numeric") + + graph$train(task_fct$clone(deep = TRUE)$filter(1:2)) + expect_equal(graph$predict(task_fct)[[1]]$data()[, c("y", "z")], data_fct[, c("y", "z")]) + + graph = ppl("convert_types", "character", "numeric", fixfactors = TRUE) + graph$train(task_fct$clone(deep = TRUE)$filter(1:2)) + expect_equal(graph$predict(task_fct)[[1]]$data()[, c("y", "z")], nadata) + + graph = ppl("convert_types", "character", "factor", fixfactors = FALSE) + + graph$train(task_chr$clone(deep = TRUE)$filter(1:2)) + expect_equal(graph$predict(task_chr)[[1]]$data()[, c("y", "z")], data_fct[, c("y", "z")]) + + + graph = ppl("convert_types", "character", "factor") + graph$train(task_chr$clone(deep = TRUE)$filter(1:2)) + + graph_z = ppl("convert_types", "character", "factor", + affect_columns = selector_name("z")) + graph_z$train(task_chr$clone(deep = TRUE)$filter(1:2)) + + expect_equal(graph_z$predict(task_chr)[[1]]$data()[, c("y", "z")], cbind(data_chr[, "y"], nadata[, "z"])) + +}) + + +test_that("id", { + + expect_equal( + ppl("convert_types", "character", "factor")$ids(), + c("convert_chr_to_fct", "convert_chr_to_fct_ff") + ) + + expect_equal( + ppl("convert_types", c("character", "POSIXct"), "factor")$ids(), + c("convert_chrpxc_to_fct", "convert_chrpxc_to_fct_ff") + ) + + expect_equal( + ppl("convert_types", c("character", "POSIXct"), "factor", fixfactors = FALSE)$ids(), + "convert_chrpxc_to_fct" + ) + + expect_equal( + ppl("convert_types", c("character", "POSIXct"), "factor", id = "abc")$ids(), + c("abc", "abc_ff") + ) + + expect_equal( + ppl("convert_types", c("character", "POSIXct"), "factor", id = "abc", fixfactors = FALSE)$ids(), + "abc" + ) + +}) diff --git a/tests/testthat/test_mlr_graphs_ovr.R b/tests/testthat/test_mlr_graphs_ovr.R index 71663a3e7..8ba4df4d6 100644 --- a/tests/testthat/test_mlr_graphs_ovr.R +++ b/tests/testthat/test_mlr_graphs_ovr.R @@ -1,6 +1,7 @@ context("ppl - pipeline_ovr") test_that("OVR Pipeline", { + skip_if_not_installed("rpart") task = tsk("wine") # assertions on graph diff --git a/tests/testthat/test_mlr_graphs_robustify.R b/tests/testthat/test_mlr_graphs_robustify.R index b412990bd..8720ca0af 100644 --- a/tests/testthat/test_mlr_graphs_robustify.R +++ b/tests/testthat/test_mlr_graphs_robustify.R @@ -1,6 +1,7 @@ context("ppl - pipeline_robustify") test_that("Robustify Pipeline", { + skip_if_not_installed("rpart") skip_on_cran() lrn = lrn("classif.rpart") @@ -103,6 +104,7 @@ test_that("Robustify Pipeline", { test_that("Robustify Pipeline Impute Missings", { + skip_if_not_installed("rpart") tmissings = tsk("pima") tnomissings = tsk("iris") @@ -151,6 +153,7 @@ makeTypeTask = function(types) { test_that("Robustify Pipeline factor to numeric", { + skip_if_not_installed("rpart") alltask = makeTypeTask(c("integer", "numeric", "logical", "character", "POSIXct")) diff --git a/tests/testthat/test_mlr_graphs_stacking.R b/tests/testthat/test_mlr_graphs_stacking.R index 8cd0d6735..c11eee099 100644 --- a/tests/testthat/test_mlr_graphs_stacking.R +++ b/tests/testthat/test_mlr_graphs_stacking.R @@ -1,6 +1,7 @@ context("ppl - pipeline_stacking") test_that("Stacking Pipeline", { + skip_if_not_installed("rpart") base_learners = list( lrn("classif.rpart", predict_type = "prob", id = "base.rpart") diff --git a/tests/testthat/test_mlr_graphs_targettrafo.R b/tests/testthat/test_mlr_graphs_targettrafo.R index fcc40fec4..9ddd32f95 100644 --- a/tests/testthat/test_mlr_graphs_targettrafo.R +++ b/tests/testthat/test_mlr_graphs_targettrafo.R @@ -1,6 +1,7 @@ context("ppl - pipeline_targettrafo") test_that("Target Trafo Pipeline", { + skip_if_not_installed("rpart") task = tsk("boston_housing_classic") tt = ppl("targettrafo", graph = PipeOpLearner$new(LearnerRegrRpart$new())) @@ -45,6 +46,7 @@ test_that("Target Trafo Pipeline", { }) test_that("More Complex Target Trafo Pipelines", { + skip_if_not_installed("rpart") task = tsk("mtcars") tt = pipeline_targettrafo((po("select") %>>% ppl("branch", list(lrn("regr.featureless"), lrn("regr.rpart"))))) expect_equal(tt$input$op.id, "targetmutate") diff --git a/tests/testthat/test_multichannels.R b/tests/testthat/test_multichannels.R index 7d2c2c00e..f0a1a0730 100644 --- a/tests/testthat/test_multichannels.R +++ b/tests/testthat/test_multichannels.R @@ -8,6 +8,7 @@ test_that("adding multiple edges to output channels works", { expect_output(print(graph), c("scale.*subsample,pca.*\n.*subsample.*scale.*\n.*pca.*scale")) + skip_if_not_installed("igraph") pdf(file = NULL) # don't show plot. It is annoying. graph$plot() dev.off() @@ -96,6 +97,7 @@ test_that("adding multiple edges to vararg input channel works", { expect_output(print(graph), c("scale.*vararg.*\n.*pca.*vararg.*\n.*vararg.*scale,pca")) + skip_if_not_installed("igraph") pdf(file = NULL) # don't show plot. It is annoying. graph$plot() dev.off() diff --git a/tests/testthat/test_multiplicities.R b/tests/testthat/test_multiplicities.R index a938ce1ca..cbb362fd7 100644 --- a/tests/testthat/test_multiplicities.R +++ b/tests/testthat/test_multiplicities.R @@ -116,6 +116,7 @@ test_that("PipeOp - evaluate_multiplicities", { }) test_that("Graph - add_edge", { + skip_if_not_installed("rpart") learner = lrn("classif.rpart") g1 = PipeOpOVRSplit$new() %>>% learner %>>% PipeOpOVRUnite$new() g2 = Graph$new() diff --git a/tests/testthat/test_pipeop_boxcox.R b/tests/testthat/test_pipeop_boxcox.R index 4216bdd5c..63a2547d8 100644 --- a/tests/testthat/test_pipeop_boxcox.R +++ b/tests/testthat/test_pipeop_boxcox.R @@ -1,6 +1,7 @@ context("PipeOpBoxCox") test_that("PipeOpBoxCox - general functionality", { + skip_if_not_installed("bestNormalize") task = mlr_tasks$get("iris") op = PipeOpBoxCox$new() expect_pipeop(op) @@ -15,6 +16,7 @@ test_that("PipeOpBoxCox - general functionality", { }) test_that("PipeOpBoxCox - receive expected result", { + skip_if_not_installed("bestNormalize") task = mlr_tasks$get("iris") op = PipeOpBoxCox$new(param_vals = list(standardize = FALSE)) result = train_pipeop(op, inputs = list(task)) diff --git a/tests/testthat/test_pipeop_encodelmer.R b/tests/testthat/test_pipeop_encodelmer.R index 1fabbc454..8ec4b297a 100644 --- a/tests/testthat/test_pipeop_encodelmer.R +++ b/tests/testthat/test_pipeop_encodelmer.R @@ -5,6 +5,8 @@ sample_n_letters = function(n, l = 3) { } test_that("PipeOpEncodeLmer regr", { + skip_if_not_installed("nloptr") + skip_if_not_installed("lme4") set.seed(8008135) task = mlr_tasks$get("boston_housing_classic") chaslevels = task$levels()$chas @@ -25,6 +27,8 @@ test_that("PipeOpEncodeLmer regr", { test_that("PipeOpEncodeLmer multi and binaryclass", { + skip_if_not_installed("nloptr") + skip_if_not_installed("lme4") set.seed(8008135) # Multiclass @@ -68,6 +72,8 @@ test_that("PipeOpEncodeLmer multi and binaryclass", { }) test_that("PipeOpEncodeLmer Edge Cases", { + skip_if_not_installed("nloptr") + skip_if_not_installed("lme4") set.seed(8008135) task = mlr3::TaskClassif$new("task", data.table::data.table(x = sample_n_letters(10, 2), y = 1:10, z = 1:10), "x") @@ -89,6 +95,8 @@ test_that("PipeOpEncodeLmer Edge Cases", { test_that("Confirms to sensible values", { + skip_if_not_installed("nloptr") + skip_if_not_installed("lme4") logit = function(x) {exp(-x) / (1+exp(-x))} data = data.table::data.table(y = factor(sample_n_letters(200, 2))) diff --git a/tests/testthat/test_pipeop_featureunion.R b/tests/testthat/test_pipeop_featureunion.R index f408fb196..50348a091 100644 --- a/tests/testthat/test_pipeop_featureunion.R +++ b/tests/testthat/test_pipeop_featureunion.R @@ -49,6 +49,7 @@ test_that("PipeOpFeatureUnion - train and predict", { }) test_that("PipeOpFeatureUnion - train and predict II", { + skip_if_not_installed("rpart") # Define PipeOp's scatter = PipeOpCopy$new(2) op2a = PipeOpPCA$new() @@ -129,6 +130,7 @@ test_that("PipeOpFeatureUnion - levels are preserved", { }) test_that("feature renaming", { + skip_if_not_installed("rpart") expect_pipeop_class(PipeOpFeatureUnion, list(letters[1:3])) expect_equal(nrow(PipeOpFeatureUnion$new(c("a", "b", "c"))$input), 3) diff --git a/tests/testthat/test_pipeop_filter.R b/tests/testthat/test_pipeop_filter.R index a02239fc5..b4ecee96a 100644 --- a/tests/testthat/test_pipeop_filter.R +++ b/tests/testthat/test_pipeop_filter.R @@ -1,6 +1,7 @@ context("PipeOpFilter") test_that("PipeOpFilter", { + skip_if_not_installed("mlr3filters") task = mlr_tasks$get("boston_housing_classic") expect_datapreproc_pipeop_class(PipeOpFilter, @@ -46,6 +47,7 @@ test_that("PipeOpFilter", { test_that("PipeOpFilter parameters", { + skip_if_not_installed("mlr3filters") po = PipeOpFilter$new(mlr3filters::FilterVariance$new()) @@ -64,6 +66,7 @@ test_that("PipeOpFilter parameters", { test_that("PipeFilter permuted", { + skip_if_not_installed("mlr3filters") set.seed(1) N = 50 task = tgen("2dnormals")$generate(N) diff --git a/tests/testthat/test_pipeop_ica.R b/tests/testthat/test_pipeop_ica.R index c93237873..73eaebb54 100644 --- a/tests/testthat/test_pipeop_ica.R +++ b/tests/testthat/test_pipeop_ica.R @@ -1,6 +1,7 @@ context("PipeOpICA") test_that("PipeOpICA - basic properties", { + skip_if_not_installed("fastICA") task = mlr_tasks$get("iris") expect_datapreproc_pipeop_class(PipeOpICA, task = task, @@ -16,6 +17,7 @@ test_that("PipeOpICA - basic properties", { }) test_that("PipeOpICA - compare to fastICA", { + skip_if_not_installed("fastICA") # Default parameters task = mlr_tasks$get("iris") op = PipeOpICA$new() diff --git a/tests/testthat/test_pipeop_imputelearner.R b/tests/testthat/test_pipeop_imputelearner.R index a443605c2..cf8a37705 100644 --- a/tests/testthat/test_pipeop_imputelearner.R +++ b/tests/testthat/test_pipeop_imputelearner.R @@ -1,6 +1,7 @@ context("PipeOpImputeLearner") test_that("PipeOpImputeLearner - simple tests", { + skip_if_not_installed("rpart") # Pima has several missings task = mlr_tasks$get("pima") po = PipeOpImputeLearner$new(learner = lrn("regr.rpart")) @@ -37,6 +38,7 @@ test_that("PipeOpImputeLearner - simple tests", { }) test_that("PipeOpImputeLearner", { + skip_if_not_installed("rpart") skip_on_cran() # slow test, so we don't do it on cran task = mlr_tasks$get("pima") diff --git a/tests/testthat/test_pipeop_learner.R b/tests/testthat/test_pipeop_learner.R index 048d46b58..9b45b707d 100644 --- a/tests/testthat/test_pipeop_learner.R +++ b/tests/testthat/test_pipeop_learner.R @@ -19,6 +19,7 @@ test_that("PipeOpLearner - basic properties", { }) test_that("PipeOpLearner - param_set and values", { + skip_if_not_installed("rpart") lrn = mlr_learners$get("classif.rpart") po = PipeOpLearner$new(lrn) @@ -55,6 +56,7 @@ test_that("PipeOpLearner - param_set and values", { }) test_that("PipeOpLearner - graph but no id", { + skip_if_not_installed("rpart") g = PipeOpNOP$new() %>>% PipeOpLearner$new(LearnerClassifRpart$new()) po = PipeOpLearner$new(g) expect_string(po$id) @@ -84,6 +86,8 @@ test_that("PipeOpLearner - model active binding to state", { }) test_that("packages", { + skip_if_not_installed("rpart") + expect_set_equal( c("mlr3pipelines", lrn("classif.rpart")$packages), po("learner", learner = lrn("classif.rpart"))$packages diff --git a/tests/testthat/test_pipeop_learnercv.R b/tests/testthat/test_pipeop_learnercv.R index 3248925b0..83577295f 100644 --- a/tests/testthat/test_pipeop_learnercv.R +++ b/tests/testthat/test_pipeop_learnercv.R @@ -39,6 +39,7 @@ test_that("PipeOpLearnerCV - basic properties", { }) test_that("PipeOpLearnerCV - param values", { + skip_if_not_installed("rpart") lrn = mlr_learners$get("classif.rpart") polrn = PipeOpLearnerCV$new(lrn) expect_subset(c("minsplit", "resampling.method", "resampling.folds"), polrn$param_set$ids()) @@ -50,6 +51,7 @@ test_that("PipeOpLearnerCV - param values", { }) test_that("PipeOpLearnerCV - within resampling", { + skip_if_not_installed("rpart") lrn = mlr_learners$get("classif.rpart") gr = GraphLearner$new(PipeOpLearnerCV$new(lrn) %>>% po(id = "l2", lrn)) rr = resample(tsk("iris"), gr, rsmp("holdout")) @@ -57,6 +59,7 @@ test_that("PipeOpLearnerCV - within resampling", { }) test_that("PipeOpLearnerCV - insample resampling", { + skip_if_not_installed("rpart") lrn = mlr_learners$get("classif.featureless") iris_with_unambiguous_mode = mlr_tasks$get("iris")$filter(c(1:49, 52:150)) # want featureless learner without randomness @@ -72,6 +75,7 @@ test_that("PipeOpLearnerCV - insample resampling", { }) test_that("PipeOpLearnerCV - graph but no id", { + skip_if_not_installed("rpart") g = PipeOpNOP$new() %>>% PipeOpLearner$new(LearnerClassifRpart$new()) po = PipeOpLearnerCV$new(g) expect_string(po$id) @@ -101,6 +105,7 @@ test_that("PipeOpLearnerCV - model active binding to state", { }) test_that("predict_type", { + skip_if_not_installed("rpart") expect_equal(po("learner_cv", lrn("classif.rpart", predict_type = "response"))$predict_type, "response") expect_equal(po("learner_cv", lrn("classif.rpart", predict_type = "prob"))$predict_type, "prob") diff --git a/tests/testthat/test_pipeop_missind.R b/tests/testthat/test_pipeop_missind.R index 320484cb6..5754e8c24 100644 --- a/tests/testthat/test_pipeop_missind.R +++ b/tests/testthat/test_pipeop_missind.R @@ -99,6 +99,7 @@ test_that("PipeOpMissInd", { # https://stackoverflow.com/questions/60512348/how-to-impute-data-with-mlr3-and-predict-with-na-values test_that("union with missing rows", { + skip_if_not_installed("rpart") data("mtcars", package = "datasets") data = mtcars[, 1:3] # Train task diff --git a/tests/testthat/test_pipeop_ovr.R b/tests/testthat/test_pipeop_ovr.R index 3dd9169d9..6a76eae65 100644 --- a/tests/testthat/test_pipeop_ovr.R +++ b/tests/testthat/test_pipeop_ovr.R @@ -49,6 +49,7 @@ test_that("PipeOpOVRUnite - basic properties", { }) test_that("PipeOpOVRUnite- train and predict", { + skip_if_not_installed("rpart") # toy tasks that are splitted, trained and predicted manually feature = rep(c(1, 0), c(10, 20)) dat1 = data.table(target = as.factor(rep(c("a", "rest"), c(10, 20))), feature = feature) @@ -94,6 +95,7 @@ test_that("PipeOpOVRUnite- train and predict", { context("PipeOpOVRSplit and PipeOpOVRUnite") test_that("PipeOpOVRSplit and PipeOpOVRUnite - train and predict", { + skip_if_not_installed("rpart") # same toy task but now we compare the results to the automated Graph's results feature = rep(c(1, 0), c(10, 20)) dat0 = data.table(target = as.factor(rep(c("a", "b", "c"), each = 10)), feature = feature) @@ -127,6 +129,7 @@ test_that("PipeOpOVRSplit and PipeOpOVRUnite - train and predict", { }) test_that("PipeOpOVRSplit and PipeOpOVRUnite - task size", { + skip_if_not_installed("rpart") gr = PipeOpOVRSplit$new() %>>% LearnerClassifRpart$new() %>>% PipeOpOVRUnite$new() gr$train(tsk("iris")$filter(c(1:30, 51:80, 101:130))) prd = gr$predict(tsk("iris")$filter(c(1:30, 51:80, 101:130)))[[1]] diff --git a/tests/testthat/test_pipeop_proxy.R b/tests/testthat/test_pipeop_proxy.R index 960fab124..bc50ae73b 100644 --- a/tests/testthat/test_pipeop_proxy.R +++ b/tests/testthat/test_pipeop_proxy.R @@ -94,6 +94,7 @@ Training debug2 with input list\\(input_1 = 2\\)$") Predicting debug2 with input list\\(input_1 = 4\\) and state list\\(input_1 = 2\\)$") + skip_if_not_installed("fastICA") # --------- # NOP | feature union | NOP; feature union has vararg @@ -119,6 +120,7 @@ Training debug2 with input list\\(input_1 = 2\\)$") test_that("Cloning as expected", { + skip_if_not_installed("rpart") gr = PipeOpProxy$new() %>>% LearnerClassifRpart$new() diff --git a/tests/testthat/test_pipeop_randomresponse.R b/tests/testthat/test_pipeop_randomresponse.R index 6284be65d..486340879 100644 --- a/tests/testthat/test_pipeop_randomresponse.R +++ b/tests/testthat/test_pipeop_randomresponse.R @@ -13,6 +13,8 @@ test_that("basic properties", { test_that("train and predict", { + skip_if_not_installed("rpart") + skip_if_not_installed("rpart") task1 = mlr_tasks$get("iris") task1$row_roles$use = c(1:10, 140:150) g1 = LearnerClassifRpart$new() %>>% PipeOpRandomResponse$new() diff --git a/tests/testthat/test_pipeop_smote.R b/tests/testthat/test_pipeop_smote.R index 0f6181f8e..9e2e1b7ee 100644 --- a/tests/testthat/test_pipeop_smote.R +++ b/tests/testthat/test_pipeop_smote.R @@ -1,6 +1,7 @@ context("PipeOpSmote") test_that("PipeOpSmote - basic properties", { + skip_if_not_installed("smotefamily") set.seed(1234) data = smotefamily::sample_generator(1000, ratio = 0.80) data$result = as.factor(data$result) @@ -18,6 +19,7 @@ test_that("PipeOpSmote - basic properties", { }) test_that("compare to smotefamily::SMOT", { + skip_if_not_installed("smotefamily") set.seed(1234) data = smotefamily::sample_generator(1000, ratio = 0.80) data$result = as.factor(data$result) diff --git a/tests/testthat/test_pipeop_targetmutate.R b/tests/testthat/test_pipeop_targetmutate.R index 940fa2212..1d06fd01d 100644 --- a/tests/testthat/test_pipeop_targetmutate.R +++ b/tests/testthat/test_pipeop_targetmutate.R @@ -1,6 +1,7 @@ context("PipeOpTargetMutate") test_that("PipeOpTargetMutate - basic properties", { + skip_if_not_installed("rpart") expect_pipeop_class(PipeOpTargetMutate, list(id = "po")) po = PipeOpTargetMutate$new("po") @@ -38,6 +39,7 @@ test_that("PipeOpTargetMutate - basic properties", { }) test_that("PipeOpTargetMutate - log base 2 trafo", { + skip_if_not_installed("rpart") g = Graph$new() g$add_pipeop(PipeOpTargetMutate$new("logtrafo", param_vals = list( diff --git a/tests/testthat/test_pipeop_targettrafoscalerange.R b/tests/testthat/test_pipeop_targettrafoscalerange.R index 9ae209993..d66f34d31 100644 --- a/tests/testthat/test_pipeop_targettrafoscalerange.R +++ b/tests/testthat/test_pipeop_targettrafoscalerange.R @@ -1,6 +1,7 @@ context("PipeOpTargetTrafoScaleRange") test_that("PipeOpTargetTrafoScaleRange - basic properties", { + skip_if_not_installed("rpart") expect_pipeop_class(PipeOpTargetTrafoScaleRange, list(id = "po")) po = PipeOpTargetTrafoScaleRange$new() @@ -48,6 +49,7 @@ test_that("PipeOpTargetTrafoScaleRange - basic properties", { }) test_that("PipeOpTargetTrafoScaleRange - row use subsets", { + skip_if_not_installed("rpart") po = PipeOpTargetTrafoScaleRange$new() task = mlr_tasks$get("boston_housing_classic") diff --git a/tests/testthat/test_pipeop_textvectorizer.R b/tests/testthat/test_pipeop_textvectorizer.R index d916ad29c..4f13fa4b3 100644 --- a/tests/testthat/test_pipeop_textvectorizer.R +++ b/tests/testthat/test_pipeop_textvectorizer.R @@ -155,6 +155,7 @@ test_that("PipeOpTextVectorizer - integer sequence", { skip_if_not_installed("quanteda") suppressWarnings(loadNamespace("quanteda")) # TODO: see https://github.com/quanteda/quanteda/issues/2116 , may not be an issue in the future + set.seed(1) task = mlr_tasks$get("iris") # create hacky text data: dt = data.table("txt" = apply(iris, 1, function(x) { diff --git a/tests/testthat/test_pipeop_threshold.R b/tests/testthat/test_pipeop_threshold.R index 6b2e2e5f8..18a334518 100644 --- a/tests/testthat/test_pipeop_threshold.R +++ b/tests/testthat/test_pipeop_threshold.R @@ -14,6 +14,7 @@ test_that("threshold general", { test_that("thresholding works for binary", { + skip_if_not_installed("rpart") po_lrn = po(lrn("classif.rpart", predict_type = "prob")) # binary @@ -63,6 +64,7 @@ test_that("thresholding works for binary", { test_that("thresholding works for multiclass", { + skip_if_not_installed("rpart") po_lrn = po(lrn("classif.rpart", predict_type = "prob")) # multiclass diff --git a/tests/testthat/test_pipeop_tunethreshold.R b/tests/testthat/test_pipeop_tunethreshold.R index 4e22728af..09f9159ae 100644 --- a/tests/testthat/test_pipeop_tunethreshold.R +++ b/tests/testthat/test_pipeop_tunethreshold.R @@ -1,6 +1,7 @@ context("tunethreshold") test_that("threshold works for multiclass", { + skip_if_not_installed("rpart") t = tsk("iris") po_cv = po("learner_cv", learner = lrn("classif.rpart", predict_type = "prob")) res = po_cv$train(list(t)) @@ -17,6 +18,7 @@ test_that("threshold works for multiclass", { }) test_that("threshold works for binary", { + skip_if_not_installed("rpart") t = tsk("pima") po_cv = po("learner_cv", learner = lrn("classif.rpart", predict_type = "prob")) res = po_cv$train(list(t)) @@ -36,6 +38,7 @@ test_that("threshold works for binary", { }) test_that("tunethreshold graph works", { + skip_if_not_installed("rpart") graph = po("learner_cv", lrn("classif.rpart", predict_type = "prob")) %>>% po("tunethreshold") diff --git a/tests/testthat/test_pipeop_updatetarget.R b/tests/testthat/test_pipeop_updatetarget.R index 484c78dc5..67f1425e4 100644 --- a/tests/testthat/test_pipeop_updatetarget.R +++ b/tests/testthat/test_pipeop_updatetarget.R @@ -85,6 +85,7 @@ test_that("rename target", { }) test_that("update resample and predict_newdata", { + skip_if_not_installed("rpart") skip_on_cran() t = tsk("wine") pom = PipeOpUpdateTarget$new(param_vals = list(new_target_name = "type", new_task_type = "classif")) diff --git a/tests/testthat/test_pipeop_yeojohnson.R b/tests/testthat/test_pipeop_yeojohnson.R index 7fb2c29ac..78f79d23d 100644 --- a/tests/testthat/test_pipeop_yeojohnson.R +++ b/tests/testthat/test_pipeop_yeojohnson.R @@ -1,6 +1,7 @@ context("PipeOpYeoJohnson") test_that("PipeOpYeoJohnson - general functionality", { + skip_if_not_installed("bestNormalize") task = mlr_tasks$get("iris") op = PipeOpYeoJohnson$new() expect_pipeop(op) @@ -12,6 +13,7 @@ test_that("PipeOpYeoJohnson - general functionality", { }) test_that("PipeOpYeoJohnson - receive expected result", { + skip_if_not_installed("bestNormalize") task = mlr_tasks$get("iris") op = PipeOpYeoJohnson$new(param_vals = list(standardize = FALSE)) result = train_pipeop(op, inputs = list(task)) diff --git a/tests/testthat/test_po.R b/tests/testthat/test_po.R index 3438330d3..f52ea538a 100644 --- a/tests/testthat/test_po.R +++ b/tests/testthat/test_po.R @@ -1,6 +1,7 @@ context("mlr_pipeops") test_that("mlr_pipeops access works", { + skip_if_not_installed("rpart") expect_equal(po(), mlr_pipeops) @@ -204,6 +205,7 @@ test_that("mlr_pipeops multi-access works", { }) test_that("Incrementing ids works", { + skip_if_not_installed("rpart") x = po("pca_123") expect_true(x$id == "pca_123") expect_r6(x, "PipeOpPCA") diff --git a/tests/testthat/test_ppl.R b/tests/testthat/test_ppl.R index 9c88f868b..700eff726 100644 --- a/tests/testthat/test_ppl.R +++ b/tests/testthat/test_ppl.R @@ -59,6 +59,7 @@ test_that("mlr_pipeops multi-access works", { }) test_that("mlr3book authors don't sleepwalk through life", { + skip_if_not_installed("rpart") tasks = tsks(c("breast_cancer", "sonar")) diff --git a/tests/testthat/test_resample.R b/tests/testthat/test_resample.R index ea8763385..bf165d298 100644 --- a/tests/testthat/test_resample.R +++ b/tests/testthat/test_resample.R @@ -1,6 +1,7 @@ context("resample") test_that("PipeOp - Resample", { + skip_if_not_installed("rpart") task = mlr_tasks$get("iris") op1 = PipeOpScale$new() lrn = mlr_learners$get("classif.rpart") diff --git a/tests/testthat/test_typecheck.R b/tests/testthat/test_typecheck.R index fdd99966e..bb496c57a 100644 --- a/tests/testthat/test_typecheck.R +++ b/tests/testthat/test_typecheck.R @@ -2,6 +2,7 @@ context("Typecheck") test_that("utility function works", { + skip_if_not_installed("rpart") expect_equal(get_r6_inheritance("data.table"), NULL) expect_equal(get_r6_inheritance("PipeOp"), "PipeOp") @@ -182,6 +183,7 @@ test_that("Autoconversion for pipeops works", { test_that("R6 inheritance inference works with packages that are not loaded", { + skip_if_not_installed("ranger") skip_if_not_installed("mlr3learners") skip_on_cran() library("mlr3learners") diff --git a/tests/testthat/test_usecases.R b/tests/testthat/test_usecases.R index 40117175b..aafb0ea65 100644 --- a/tests/testthat/test_usecases.R +++ b/tests/testthat/test_usecases.R @@ -19,6 +19,8 @@ test_graph = function(g, n_nodes, n_edges) { } test_that("linear: scale + pca + learn", { + skip_if_not_installed("rpart") + skip_if_not_installed("rpart") g = PipeOpScale$new() %>>% PipeOpPCA$new() %>>% PipeOpLrnRP z = test_graph(g, n_nodes = 3L, n_edges = 2L) @@ -44,6 +46,7 @@ test_that("linear: scale + pca + learn", { }) test_that("featureunion", { + skip_if_not_installed("rpart") g = gunion(list(PipeOpPCA$new(), PipeOpNOP$new())) %>>% PipeOpFeatureUnion$new(2L) %>>% PipeOpLrnRP z = test_graph(g, n_nodes = 4L, n_edges = 3L) @@ -60,6 +63,8 @@ test_that("featureunion", { # FIXME: have a look at intermediate results in all usecase, we should expect some stuff there test_that("bagging", { + skip_if_not_installed("rpart") + skip_if_not_installed("rpart") g = pipeline_greplicate(PipeOpSubsample$new() %>>% PipeOpLrnRP, 2L) %>>% PipeOpClassifAvg$new(innum = 2L) g$pipeops$subsample_1$param_set$values$frac = .5 g$pipeops$subsample_2$param_set$values$frac = .5 @@ -72,6 +77,8 @@ test_that("bagging", { test_that("branching", { + skip_if_not_installed("rpart") + skip_if_not_installed("rpart") # FIXME: are we REALLY sure that stuff here gets connected in the correct order? # i doubt that and this looks really bad and errorprone # b) we really want to have an associated order in the graph which is determined by @@ -100,6 +107,8 @@ test_that("branching", { }) test_that("branching with varargs", { + skip_if_not_installed("rpart") + skip_if_not_installed("rpart") g = PipeOpBranch$new(2L) %>>% gunion(list(PipeOpLrnRP, PipeOpLrnFL)) %>>% PipeOpUnbranch$new() z = test_graph(g, n_nodes = 4L, n_edges = 4L) @@ -123,12 +132,14 @@ test_that("branching with varargs", { test_that("task chunking", { + skip_if_not_installed("rpart") g = PipeOpChunk$new(2L) %>>% pipeline_greplicate(PipeOpLrnRP, 2L) %>>% PipeOpClassifAvg$new(2L) z = test_graph(g, n_nodes = 4L, n_edges = 4L) }) test_that("stacking", { + skip_if_not_installed("rpart") task = mlr_tasks$get("iris") lrn1 = mlr_learners$get("classif.rpart")