hubverse-org · zkamvar · Jan 15, 2025 · Jan 13, 2025 · Jan 13, 2025 · Jan 13, 2025
diff --git a/R/check_tbl_value_col_ascending.R b/R/check_tbl_value_col_ascending.R
@@ -8,15 +8,15 @@
 #' If not, the check is skipped and a `<message/check_info>` condition class
 #' object is returned.
 #'
-#' @inherit check_tbl_value_col params
+#' @inherit check_tbl_values params
 #' @inherit check_tbl_col_types return
 #' @export
 check_tbl_value_col_ascending <- function(tbl, file_path, hub_path, round_id,
                                           derived_task_ids = get_hub_derived_task_ids(hub_path)) {
+  check_output_types <- intersect(c("cdf", "quantile"), unique(tbl[["output_type"]]))
 
   # Exit early if there are no values to check
-  no_values_to_check <- all(!c("cdf", "quantile") %in% tbl[["output_type"]])
-  if (no_values_to_check) {
+  if (length(check_output_types) == 0L) {
     return(
       capture_check_info(
         file_path,
@@ -27,28 +27,25 @@ check_tbl_value_col_ascending <- function(tbl, file_path, hub_path, round_id,
   }
 
   config_tasks <- hubUtils::read_config(hub_path, "tasks")
-  not_value <- names(tbl) != "value"
-  tbl[not_value] <- hubData::coerce_to_character(tbl[not_value])
+
   if (!is.null(derived_task_ids)) {
     tbl[derived_task_ids] <- NA_character_
   }
-  round_output_types <- get_round_output_type_names(config_tasks, round_id)
-  only_cdf_or_quantile <- intersect(c("cdf", "quantile"), round_output_types)
-  # FIX for <https://github.com/hubverse-org/hubValidations/issues/78>
-  # This function uses an inner join to auto-sort the table by model task,
-  # splitting by output type. We can use that to loop through the check.
-  output_type_tbls <- match_tbl_to_model_task(
-    tbl,
-    config_tasks = config_tasks,
-    round_id = round_id,
-    output_types = only_cdf_or_quantile,
-    derived_task_ids = derived_task_ids
+
+  # Check that values are non-decreasing for each output type separately to reduce
+  # memory pressure
+  error_tbl <- purrr::map(
+    check_output_types,
+    \(.x) {
+      check_values_ascending_by_output_type(
+        .x, tbl,
+        config_tasks, round_id,
+        derived_task_ids
+      )
+    }
   ) %>%
-    purrr::compact()
-  error_tbl <- purrr::map(output_type_tbls, check_values_ascending) %>%
     purrr::list_rbind()
 
-
   check <- nrow(error_tbl) == 0L
 
   if (check) {
@@ -70,9 +67,34 @@ check_tbl_value_col_ascending <- function(tbl, file_path, hub_path, round_id,
   )
 }
 
+#' Check that values for each model task in specific output types are ascending
+#'
+#' This function allows us to map over individual output types one at a time to
+#' reduce memory pressure.
+#' @param output_type the output type(s) to check. Must be a character vector
+#' @noRd
+check_values_ascending_by_output_type <- function(output_type, tbl,
+                                                  config_tasks, round_id,
+                                                  derived_task_ids) {
+  # FIX for <https://github.com/hubverse-org/hubValidations/issues/78>
+  # This function uses an inner join to auto-sort the table by model task,
+  # splitting by output type. We can use that to loop through the check.
+  output_type_tbls <- match_tbl_to_model_task(
+    tbl,
+    config_tasks = config_tasks,
+    round_id = round_id,
+    output_types = output_type,
+    derived_task_ids = derived_task_ids
+  ) %>%
+    purrr::compact()
+
+  purrr::map(output_type_tbls, check_values_ascending) %>%
+    purrr::list_rbind()
+}
+
 #' Check that values for each model task are ascending
 #'
-#' @param tbl a table with a single output type
+#' @param tbl an all character table with a single output type
 #' @return
 #'  - If the check succeeds, and all values are non-decreasing: NULL
 #'  - If the check fails, a summary table showing the model tasks that

diff --git a/R/match_tbl_to_model_task.R b/R/match_tbl_to_model_task.R
@@ -36,7 +36,7 @@ match_tbl_to_model_task <- function(tbl, config_tasks, round_id,
     config_tasks,
     round_id = round_id,
     required_vals_only = FALSE,
-    all_character = TRUE,
+    all_character = all_character,
     as_arrow_table = FALSE,
     bind_model_tasks = FALSE,
     output_types = output_types,

diff --git a/R/validate_model_data.R b/R/validate_model_data.R
@@ -211,7 +211,7 @@ validate_model_data <- function(hub_path, file_path, round_id_col = NULL,
 
   checks$value_col_non_desc <- try_check(
     check_tbl_value_col_ascending(
-      tbl,
+      tbl_chr,
       file_path = file_path,
       hub_path = hub_path,
       round_id = round_id,

diff --git a/man/check_tbl_value_col_ascending.Rd b/man/check_tbl_value_col_ascending.Rd
diff --git a/tests/testthat/test-check_tbl_value_col_ascending.R b/tests/testthat/test-check_tbl_value_col_ascending.R
@@ -2,7 +2,7 @@ test_that("check_tbl_value_col_ascending works", {
   hub_path <- system.file("testhubs/simple", package = "hubValidations")
   file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
   file_meta <- parse_file_name(file_path)
-  tbl <- hubValidations::read_model_out_file(file_path, hub_path)
+  tbl <- read_model_out_file(file_path, hub_path, coerce_types = "chr")
 
   expect_snapshot(
     check_tbl_value_col_ascending(tbl, file_path, hub_path, file_meta$round_id)
@@ -12,7 +12,7 @@ test_that("check_tbl_value_col_ascending works", {
   file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet"
   file_meta <- parse_file_name(file_path)
 
-  tbl <- hubValidations::read_model_out_file(file_path, hub_path)
+  tbl <- read_model_out_file(file_path, hub_path, coerce_types = "chr")
 
   expect_snapshot(
     check_tbl_value_col_ascending(tbl, file_path, hub_path, file_meta$round_id)
@@ -22,7 +22,7 @@ test_that("check_tbl_value_col_ascending works", {
 test_that("check_tbl_value_col_ascending works when output type IDs not ordered", {
   hub_path <- test_path("testdata/hub-unordered/")
   file_path <- "ISI-NotOrdered/2024-01-10-ISI-NotOrdered.csv"
-  tbl <- read_model_out_file(file_path, hub_path)
+  tbl <- read_model_out_file(file_path, hub_path, coerce_types = "chr")
   file_meta <- parse_file_name(file_path)
   expect_snapshot(
     check_tbl_value_col_ascending(tbl, file_path, hub_path, file_meta$round_id)
@@ -33,7 +33,7 @@ test_that("check_tbl_value_col_ascending errors correctly", {
   hub_path <- system.file("testhubs/simple", package = "hubValidations")
   file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
   file_meta <- parse_file_name(file_path)
-  tbl <- hubValidations::read_model_out_file(file_path, hub_path)
+  tbl <- read_model_out_file(file_path, hub_path, coerce_types = "chr")
 
   tbl$value[c(1, 10)] <- 150
 
@@ -44,7 +44,7 @@ test_that("check_tbl_value_col_ascending errors correctly", {
   hub_path <- system.file("testhubs/flusight", package = "hubUtils")
   file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet"
   file_meta <- parse_file_name(file_path)
-  tbl <- hubValidations::read_model_out_file(file_path, hub_path)
+  tbl <- read_model_out_file(file_path, hub_path, coerce_types = "chr")
   tbl_error <- tbl
   # TODO: 2025-01-07 investigate the purpose of adding an invalid target, which
   # causes the test to fail
@@ -72,7 +72,7 @@ test_that("check_tbl_value_col_ascending skips correctly", {
   hub_path <- system.file("testhubs/simple", package = "hubValidations")
   file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
   file_meta <- parse_file_name(file_path)
-  tbl <- hubValidations::read_model_out_file(file_path, hub_path)
+  tbl <- read_model_out_file(file_path, hub_path, coerce_types = "chr")
   tbl <- tbl[tbl$output_type == "mean", ]
 
   expect_snapshot(
@@ -116,9 +116,10 @@ test_that("(#78) check_tbl_value_col_ascending will sort even if the data doesn'
   convert_to_cdf <- function(x) {
     ifelse(x == "quantile", "cdf", x)
   }
-  tbl <- hubValidations::read_model_out_file(file_path, hub_path) %>%
+  tbl <- read_model_out_file(file_path, hub_path) %>%
     dplyr::mutate(output_type_id = make_unsortable(.data[["output_type_id"]])) %>%
-    dplyr::mutate(output_type = convert_to_cdf(.data[["output_type"]]))
+    dplyr::mutate(output_type = convert_to_cdf(.data[["output_type"]])) %>%
+    hubData::coerce_to_character()
 
   # validating when it is sorted -----------------------------------------
   res <- check_tbl_value_col_ascending(tbl, file_path, hub_path, file_meta$round_id)
@@ -161,14 +162,28 @@ test_that("(#78) check_tbl_value_col_ascending will sort even if the data doesn'
   expect_equal(actual, expected, ignore_attr = TRUE)
 })
 
-
 test_that("(#78) check_tbl_value_col_ascending works when output type IDs differ by target", {
   hub_path <- test_path("testdata/hub-diff-otid-per-task/")
   file_path <- "ISI-NotOrdered/2024-01-10-ILI-model.csv"
-  tbl <- hubValidations::read_model_out_file(file_path, hub_path)
+  tbl <- read_model_out_file(file_path, hub_path, coerce_types = "chr")
   file_meta <- parse_file_name(file_path)
 
   res_ok <- check_tbl_value_col_ascending(tbl, file_path, hub_path, file_meta$round_id)
   expect_s3_class(res_ok, "check_success")
   expect_null(res_ok$error_tbl)
 })
+
+test_that("(#189) check_tbl_value_col_ascending ignores derived task IDs", {
+  hub_path <- test_path("testdata/hub-177")
+  file_path <- "FluSight-baseline/2024-12-14-FluSight-baseline.parquet"
+  tbl <- read_model_out_file(file_path, hub_path, coerce_types = "chr")
+  file_meta <- parse_file_name(file_path)
+
+  # Introduce invalid value to derived task id that should be ignored when using
+  # `derived_task_ids`.
+  tbl[1, "target_end_date"] <- "random_date"
+
+  res_ok <- check_tbl_value_col_ascending(tbl, file_path, hub_path, file_meta$round_id)
+  expect_s3_class(res_ok, "check_success")
+  expect_null(res_ok$error_tbl)
+})