From 20f50d53d81648817e20bba801e63900991c6db1 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 17 Aug 2024 16:38:28 +0200 Subject: [PATCH] Make as.data.table(mlr_pipeops) more robust against construcor errors Closes #483 --- R/mlr_pipeops.R | 38 +++++++++++++++++++++++++------- tests/testthat/test_dictionary.R | 28 +++++++++++++++++++++++ 2 files changed, 58 insertions(+), 8 deletions(-) diff --git a/R/mlr_pipeops.R b/R/mlr_pipeops.R index 7806920f8..c40ab4f9c 100644 --- a/R/mlr_pipeops.R +++ b/R/mlr_pipeops.R @@ -70,35 +70,50 @@ mlr_pipeops = R6Class("DictionaryPipeOp", inherit = mlr3misc::Dictionary, #' @export as.data.table.DictionaryPipeOp = function(x, ...) { - setkeyv(map_dtr(x$keys(), function(key) { + result = setkeyv(map_dtr(x$keys(), function(key) { metainf = x$metainf[[key]] if (!is.null(metainf)) { metainfval = eval(metainf, envir = topenv()) meta_one = lapply(metainfval, function(x) if (identical(x, "N")) 1 else x) meta_two = lapply(metainfval, function(x) if (identical(x, "N")) 2 else x) - l1 = do.call(x$get, c(list(key), meta_one)) - l2 = do.call(x$get, c(list(key), meta_two)) + l1 = tryCatch(do.call(x$get, c(list(key), meta_one)), error = function(e) ".__error__") + l2 = tryCatch(do.call(x$get, c(list(key), meta_two)), error = function(e) ".__error__") } else { - l1 = l2 = x$get(key) + l1 = l2 = tryCatch(x$get(key), error = function(e) ".__error__") + } + if (identical(l1, ".__error__") || identical(l2, ".__error__")) { + return(list( + key = key, + label = NA_character_, + packages = list(NA_character_), + tags = list(NA_character_), + feature_types = list(NA_character_), + input.num = NA_integer_, + output.num = NA_integer_, + input.type.train = list(NA_character_), + input.type.predict = list(NA_character_), + output.type.train = list(NA_character_), + output.type.predict = list(NA_character_) + )) } if (nrow(l1$input) == nrow(l2$input) && "..." %nin% l1$input$name) { innum = nrow(l1$input) } else { - innum = NA + innum = NA_integer_ } if (nrow(l1$output) == nrow(l2$output)) { outnum = nrow(l1$output) } else { - outnum = NA + outnum = NA_integer_ } - if (exists("feature_types", envir = l1)) ft = list(l1$feature_types) else ft = NA + if (exists("feature_types", envir = l1)) ft = l1$feature_types else ft = NA_character_ list( key = key, label = l1$label, packages = list(l1$packages), tags = list(l1$tags), - feature_types = ft, + feature_types = list(ft), input.num = innum, output.num = outnum, input.type.train = list(l1$input$train), @@ -107,4 +122,11 @@ as.data.table.DictionaryPipeOp = function(x, ...) { output.type.predict = list(l1$output$predict) ) }), "key")[] + + # I don't trust 'label' to never be NA, but 'packages' is always a `character` (even if often an empty one). + missings = result$key[map_lgl(result$packages, function(x) any(is.na(x)))] + if (length(missings)) { + warningf("The following PipeOps could not be constructed, likely due to missing packages: %s\nTheir corresponding information is incomplete.", paste(missings, collapse = ", ")) + } + result } diff --git a/tests/testthat/test_dictionary.R b/tests/testthat/test_dictionary.R index a35a902fa..1682e990a 100644 --- a/tests/testthat/test_dictionary.R +++ b/tests/testthat/test_dictionary.R @@ -247,3 +247,31 @@ test_that("Cannot add pipeops with keys that invalidates the convenience for id copy = mlr_pipeops$clone(deep = TRUE) expect_error(copy$add("name_1", PipeOp), regexp = "grepl") }) + +test_that("as.data.table(mlr_pipeops) works when a pipeop can not be constructed", { + PipeOpError = R6Class("PipeOpError", inherit = PipeOp, public = list( + initialize = function() { + stop("This PipeOp can not be constructed") + } + )) + dt_before = as.data.table(mlr_pipeops) + + # No NAs in the data.table of mlr3pipelines' own PipeOps + expect_false(any(is.na(unlist( + dt_before[, c("key", "label", "packages", "tags", "input.type.train", "input.type.predict", + "output.type.train", "output.type.predict"), with = FALSE]) + ))) + + mlr_pipeops$add("error_pipeop", PipeOpError) + expect_warning({dt_after = as.data.table(mlr_pipeops)}, "could not be constructed.*error_pipeop") + mlr_pipeops$remove("error_pipeop") + + expect_equal(dt_after[key == "error_pipeop"], + # use weird data.table constructor since we can't use 'key' in the data.table() call + setkeyv(data.table()[, `:=`(key = "error_pipeop", label = NA_character_, packages = list(NA_character_), + tags = list(NA_character_), feature_types = list(NA_character_), + input.num = NA_integer_, output.num = NA_integer_, + input.type.train = list(NA_character_), input.type.predict = list(NA_character_), + output.type.train = list(NA_character_), output.type.predict = list(NA_character_))][], "key")) + +})