Update pred_toZ()

- #102 add import, return, and see also roxygen doc tags, replace note with details tag, rename fn doc title - #116 update argument checks conditional expression - #118 match output to `log_transform_yi()` (now returns additional cols `lower` and `upper`, not only `c("Z","VZ")`) - #118 match process to `log_transform_yi()` and #97 generalise processing to both euc/bt datasets without hard-coding dataset names in fns, and remove associated dataset-specific argument checking #116
egouldo · Aug 15, 2024 · a9a3f61 · a9a3f61
1 parent 89947c1
commit a9a3f61
Showing 1 changed file with 48 additions and 62 deletions.
diff --git a/R/pred_to_Z.R b/R/pred_to_Z.R
@@ -1,74 +1,60 @@
-#' Standardize Out-Of-Sample Predictions
+#' Z-standardise a dataframe of back-transformed Out-Of-Sample Predictions
 #'
 #' @description Standardizes out-of-sample predictions by computing the Fisher's Z transformed Correlation Coefficient from analysts' out-of-sample prediction estimates and corresponding standard error.
-#' @note `pred_to_Z` expects estimates to be on the response scale, not the link scale.
 #' @param back_transformed_data a dataframe or tibble with the columns "estimate" and "se.fit", containing yi and SE\(yi\) values respectively
 #' @param response_variable_name a character vector
+#' @return A tibble of standardised-out-of-sample predictions on the Z-scale, with columns `Z`, `VZ`, `lower` and `upper`, and the original columns fro `back_transformed_data` that were not used / updated in the transformation.
+#' @details This function is used to standardize out-of-sample predictions on the response scale to the Z-scale. [pred_to_Z()] expects estimates to be on the response scale, not the link scale.
+#' 
+#' The function computes the Z-score and VZ-score for each out-of-sample prediction estimate and its corresponding standard error using [Z_VZ_preds()].
 #' @export
+#' @import dplyr
+#' @import purrr
+#' @import cli
+#' @import rlang
+#' @seealso Equivalent to[log_transform_yi()] in terms of workflow data hierarchy.
 pred_to_Z <- function(back_transformed_data,
-                      params,
-                      dataset) {
-  # TODO test: str_detect(response_variable_name) in param table
-  match.arg(dataset, choices = c("blue tit", "eucalyptus"), several.ok = FALSE)
-  if (rlang::is_na(params) | rlang::is_na(back_transformed_data)) {
+                      params) {
+
+  if (any(rlang::is_na(params), 
+          rlang::is_na(back_transformed_data))) {
     cli::cli_warn("Argument {.arg params} or {.arg back_transformed_data} is {.val {NA}}. Returning {.val {NA}} for standardized predictions.")
     return(NA)
   }
-
-  if (dataset == "blue tit") {
-    if (!pointblank::test_col_exists(back_transformed_data,
-      columns = c("estimate", "se.fit")
-    )) {
-      cli::cli_warn("Blue tit Dataframe {.arg back_transformed_data} is missing columns {.val estimate} and/or {.val se.fit}. Returning {.val {NA}} for standardized predictions.")
-      return(NA)
-    }
-    sd_p <- params %>%
-      dplyr::filter(parameter == "sd") %>%
-      purrr::pluck("value")
-    mu_p <- params %>%
-      dplyr::filter(parameter == "mean") %>%
-      purrr::pluck("value")
-
+
+  sd_p <- params %>%
+    dplyr::filter(parameter == "sd") %>%
+    purrr::pluck("value")
+
+  mu_p <- params %>%
+    dplyr::filter(parameter == "mean") %>%
+    purrr::pluck("value")
+
+  names_lookup <- c(yi = "estimate", #blue tit
+                    yi = "fit", #eucalyptus
+                    yi_se = "se.fit") # both datasets
+
+  standardised_preds <- 
+    back_transformed_data %>%
+    rename(any_of(names_lookup)) %>% 
+
     standardised_preds <-
-      back_transformed_data %>%
-      mutate(
-        res = map2(
-          estimate,
-          se.fit,
-          ~ Z_VZ_preds(
-            yi = .x,
-            yi_se = .y,
-            sd_p = sd_p,
-            mu_p = mu_p
-          )
-        ),
-        .keep = c("unused")
-      ) %>%
-      hoist(res, "Z", "VZ") %>%
-      select(-starts_with("ci."))
-  } else {
-    if (!pointblank::test_col_exists(back_transformed_data,
-      columns = c("fit", "se.fit")
-    )) {
-      cli::cli_warn("Eucalyptus Dataframe {.arg back_transformed_data} is missing columns {.val fit} and/or {.val se.fit}. Returning {.val {NA}} for standardized predictions.") # TODO remove hard-coding, generalise
-      return(NA)
-    }
-    sd_p <- params %>%
-      filter(parameter == "sd") %>%
-      pluck("value")
-    mu_p <- params %>%
-      filter(parameter == "mean") %>%
-      pluck("value")
-
-    standardised_preds <-
-      back_transformed_data %>%
-      mutate(
-        res = map2(fit, se.fit, ~ Z_VZ_preds(.x, .y, sd_p, mu_p)),
-        .keep = c("unused")
-      ) %>%
-      hoist(res, "Z", "VZ") %>%
-      select(-starts_with("ci."))
-  }
-
+    back_transformed_data %>%
+    mutate(
+      res = map2(
+        estimate,
+        se.fit,
+        ~ Z_VZ_preds(
+          yi = .x,
+          yi_se = .y,
+          sd_p = sd_p,
+          mu_p = mu_p
+        )
+      ),
+      .keep = c("unused")
+    ) %>%
+    select(-starts_with("ci.")) %>% 
+    unnest(res)
+
   return(standardised_preds)
 }