-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- #102 add import, return, and see also roxygen doc tags, replace note with details tag, rename fn doc title - #116 update argument checks conditional expression - #118 match output to `log_transform_yi()` (now returns additional cols `lower` and `upper`, not only `c("Z","VZ")`) - #118 match process to `log_transform_yi()` and #97 generalise processing to both euc/bt datasets without hard-coding dataset names in fns, and remove associated dataset-specific argument checking #116
- Loading branch information
Showing
1 changed file
with
48 additions
and
62 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,74 +1,60 @@ | ||
#' Standardize Out-Of-Sample Predictions | ||
#' Z-standardise a dataframe of back-transformed Out-Of-Sample Predictions | ||
#' | ||
#' @description Standardizes out-of-sample predictions by computing the Fisher's Z transformed Correlation Coefficient from analysts' out-of-sample prediction estimates and corresponding standard error. | ||
#' @note `pred_to_Z` expects estimates to be on the response scale, not the link scale. | ||
#' @param back_transformed_data a dataframe or tibble with the columns "estimate" and "se.fit", containing yi and SE\(yi\) values respectively | ||
#' @param response_variable_name a character vector | ||
#' @return A tibble of standardised-out-of-sample predictions on the Z-scale, with columns `Z`, `VZ`, `lower` and `upper`, and the original columns fro `back_transformed_data` that were not used / updated in the transformation. | ||
#' @details This function is used to standardize out-of-sample predictions on the response scale to the Z-scale. [pred_to_Z()] expects estimates to be on the response scale, not the link scale. | ||
#' | ||
#' The function computes the Z-score and VZ-score for each out-of-sample prediction estimate and its corresponding standard error using [Z_VZ_preds()]. | ||
#' @export | ||
#' @import dplyr | ||
#' @import purrr | ||
#' @import cli | ||
#' @import rlang | ||
#' @seealso Equivalent to[log_transform_yi()] in terms of workflow data hierarchy. | ||
pred_to_Z <- function(back_transformed_data, | ||
params, | ||
dataset) { | ||
# TODO test: str_detect(response_variable_name) in param table | ||
match.arg(dataset, choices = c("blue tit", "eucalyptus"), several.ok = FALSE) | ||
if (rlang::is_na(params) | rlang::is_na(back_transformed_data)) { | ||
params) { | ||
|
||
if (any(rlang::is_na(params), | ||
rlang::is_na(back_transformed_data))) { | ||
cli::cli_warn("Argument {.arg params} or {.arg back_transformed_data} is {.val {NA}}. Returning {.val {NA}} for standardized predictions.") | ||
return(NA) | ||
} | ||
|
||
if (dataset == "blue tit") { | ||
if (!pointblank::test_col_exists(back_transformed_data, | ||
columns = c("estimate", "se.fit") | ||
)) { | ||
cli::cli_warn("Blue tit Dataframe {.arg back_transformed_data} is missing columns {.val estimate} and/or {.val se.fit}. Returning {.val {NA}} for standardized predictions.") | ||
return(NA) | ||
} | ||
sd_p <- params %>% | ||
dplyr::filter(parameter == "sd") %>% | ||
purrr::pluck("value") | ||
mu_p <- params %>% | ||
dplyr::filter(parameter == "mean") %>% | ||
purrr::pluck("value") | ||
|
||
|
||
sd_p <- params %>% | ||
dplyr::filter(parameter == "sd") %>% | ||
purrr::pluck("value") | ||
|
||
mu_p <- params %>% | ||
dplyr::filter(parameter == "mean") %>% | ||
purrr::pluck("value") | ||
|
||
names_lookup <- c(yi = "estimate", #blue tit | ||
yi = "fit", #eucalyptus | ||
yi_se = "se.fit") # both datasets | ||
|
||
standardised_preds <- | ||
back_transformed_data %>% | ||
rename(any_of(names_lookup)) %>% | ||
|
||
standardised_preds <- | ||
back_transformed_data %>% | ||
mutate( | ||
res = map2( | ||
estimate, | ||
se.fit, | ||
~ Z_VZ_preds( | ||
yi = .x, | ||
yi_se = .y, | ||
sd_p = sd_p, | ||
mu_p = mu_p | ||
) | ||
), | ||
.keep = c("unused") | ||
) %>% | ||
hoist(res, "Z", "VZ") %>% | ||
select(-starts_with("ci.")) | ||
} else { | ||
if (!pointblank::test_col_exists(back_transformed_data, | ||
columns = c("fit", "se.fit") | ||
)) { | ||
cli::cli_warn("Eucalyptus Dataframe {.arg back_transformed_data} is missing columns {.val fit} and/or {.val se.fit}. Returning {.val {NA}} for standardized predictions.") # TODO remove hard-coding, generalise | ||
return(NA) | ||
} | ||
sd_p <- params %>% | ||
filter(parameter == "sd") %>% | ||
pluck("value") | ||
mu_p <- params %>% | ||
filter(parameter == "mean") %>% | ||
pluck("value") | ||
|
||
standardised_preds <- | ||
back_transformed_data %>% | ||
mutate( | ||
res = map2(fit, se.fit, ~ Z_VZ_preds(.x, .y, sd_p, mu_p)), | ||
.keep = c("unused") | ||
) %>% | ||
hoist(res, "Z", "VZ") %>% | ||
select(-starts_with("ci.")) | ||
} | ||
|
||
back_transformed_data %>% | ||
mutate( | ||
res = map2( | ||
estimate, | ||
se.fit, | ||
~ Z_VZ_preds( | ||
yi = .x, | ||
yi_se = .y, | ||
sd_p = sd_p, | ||
mu_p = mu_p | ||
) | ||
), | ||
.keep = c("unused") | ||
) %>% | ||
select(-starts_with("ci.")) %>% | ||
unnest(res) | ||
|
||
return(standardised_preds) | ||
} |