Skip to content

Commit

Permalink
feat!: update downstream fn meta_analyse_datasets() post-#118 updates
Browse files Browse the repository at this point in the history
- rm commented out `poss_fit_*()` (now exported internally in 7dabe6d) #97
- replace name of data arg to `data` #97
- switch to rlang::is_null for arg checking #116 / conditional behaviour
- Add arg checking for `filter_vars`, `data`, and checking wither required columns, incl. those in `filter_vars` exist in `data` #116
- expose user-supplied arguments `outcom_variable` / `outcome_SE` and match values to arg conditions in new columns
- update supply of colnames arg to meta-analysis functions

docs!: update corresponding docs #102

- switch to `@importFrom` for `cli::`, add missing `@importFrom` calls
- Add `@details` on user-specified exclusion of outcome variable / outcome variable SE

style: spacing indentation, rm inline TODO comments #97
  • Loading branch information
egouldo committed Aug 28, 2024
1 parent d494c3f commit bdf5375
Showing 1 changed file with 158 additions and 81 deletions.
239 changes: 158 additions & 81 deletions R/meta_analyse_datasets.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#' Meta-analyses multiple datasets or subsets of datasets of analyst data
#'
#' @description Runs all meta-analyses and regression models for the ManyEcoEvo project analysis, including:
#' - Fitting univariate / fixed-effects meta-analysis
#' - Calculating the deviation of every effect size / point-estimate from the meta-analytic mean for all data subsetes
Expand All @@ -8,108 +9,182 @@
#' - A univariate GLM regression of the transformed deviation scores on the categorical peer-review ratings
#' - A univariate GLM regression of the transformed deviation scores on a binary variable corresponding to whether the analysis was a mixed-effects model (i.e. GLM with random-effects) or not.
#' - To be implemented: a multivariate regression #TODO
#' - The deviation scores on transformed categorical ratings but with no intercept (for nice plotting / ecploration).
#' - The deviation scores on transformed categorical ratings but with no intercept (for nice plotting / exploration).
#'
#' @param MA_data A nested-dataframe grouped by `dataset` and / or `exclusion_set`, `estimate_type`, containing the list-column of prepared analyst subset data `effects_analysis` ready for meta-analysis.
#' @param filter_vars A list of expressions to filter the `MA_data` dataframe by. E.g. `rlang::exprs(exclusion_set == "complete", expertise_subset == "All", publishable_subset == "All", collinearity_subset == "All")`
#'
#' @return A nested dataframe with all columns of object parsed to arg `MA_data`, but with additional columns for the results of each analysis: `MA_mod`, `sorensen_glm`, `box_cox_ratings_cont`, `box_cox_ratings_cat`, `box_cox_rating_cat_no_int`, `uni_mixed_effects`
#' @param data A nested- dataframe grouped by `dataset` and / or `exclusion_set`, `estimate_type`, containing the list-column of prepared analyst subset data `effects_analysis` ready for meta-analysis.
#' @param filter_vars A list of expressions to filter the `data` dataframe by. E.g. `rlang::exprs(exclusion_set == "complete", expertise_subset == "All", publishable_subset == "All", collinearity_subset == "All")`
#' #' @param outcome_variable A named list containing either/and a list of `dataset`s and their corresponding outcome variables for each value of `dataset`, a list of `estimate_type`s and their corresponding outcome variables for each value of `estimate_type`.
#' @return A nested dataframe with all columns of object parsed to arg `data`, but with additional columns for the results of each analysis: `MA_mod`, `sorensen_glm`, `box_cox_ratings_cont`, `box_cox_ratings_cat`, `box_cox_rating_cat_no_int`, `uni_mixed_effects`
#' @export
#' @importFrom purrr map_chr map2 map possibly pmap
#' @importFrom stringr str_detect
#' @importFrom purrr map_chr map2 map possibly pmap list_c map_lgl
#' @import dplyr
#' @import cli
#' @importFrom rlang na_chr is_null na_chr
#' @importFrom cli cli_h1 cli_abort
#' @importFrom rlang na_chr is_null na_chr is_list is_call exprs f_lhs as_string
#' @importFrom pointblank expect_col_exists
#' @importFrom tidyr unnest drop_na
#' @family Multi-dataset Wrapper Functions
#'
#' @examples
#' filter_vars <- rlang::exprs(
#' exclusion_set == "complete",
#' expertise_subset == "All",
#' publishable_subset == "All",
#' collinearity_subset == "All"
#' )
meta_analyse_datasets <- function(MA_data, filter_vars = NULL) {
# poss_fit_metafor_mv <- purrr::possibly(fit_metafor_mv,
# otherwise = NA,
# quiet = FALSE)
#
cli::cli_h1(text = "Meta-analysing Datasets")

if (any(str_detect(unique(MA_data$estimate_type), pattern = "Zr"))) {
# Must group by cols else multiple "effects_analysis" elements
# get passed to fit_MA_mv()
MA_data <- MA_data %>%
group_by(estimate_type, dataset, exclusion_set, publishable_subset, expertise_subset, collinearity_subset)
#' @details
#'
#' When `filter_vars` are supplied the function will filter the `data` dataframe by the expressions in the list, any data subsets excluded by filtering will not have multivariate met-analysis models fitted with [fit_multivar_MA()].
#'
#' When the arguments `outcome_variable` and/or `outcome_variable` are not supplied, the function defaults to:
#'
#' - using `"Zr"` as the standardised effect size and `"VZr"` as the standard error when `estimate_type` is `"Zr"`.
#' - using `"Z"` as the standardised out-of-sample estimate and `"VZ" `as the standardised out-of-sample estimate error when `estimate_type` is one of `c("yi", "y25", "y50", "y75")`.
#'
#' The function will check if the `data` dataframe contains the required columns for meta-analysis, including any variable names specified in calls to the `filter_vars` argument. If the required functions do not exist then the function will stop with an error.
#'
#' Function assumes that if argument `outcome_variable` is supplied, then `outcome_SE` is also supplied, and conversely, if `outcome_SE` is not supplied, then neither is `outcome_variable` (*TODO* not yet checked in function).
#'
meta_analyse_datasets <- function(data, outcome_variable = NULL, outcome_SE, filter_vars = NULL) {

# ----- Argument Checks -----
stopifnot(
is.data.frame(data),
rlang::is_null(filter_vars) | rlang::is_list(filter_vars)
)

required_columns <- c("effects_analysis",
"estimate_type",
"dataset")

if (rlang::is_list(filter_vars)) {
if (!all(map_lgl(filter_vars, rlang::is_call))) {
cli_abort("{.arg filter_vars} must be a list of calls")
} else {
required_columns <- filter_vars %>%
map(rlang::f_lhs) %>%
map(rlang::as_string) %>%
list_c() %>%
append(values = required_columns) %>%
unique()
}
}

pointblank::expect_col_exists(
data,
columns = required_columns
)

# ---- Data Preparation & Conditional Object Assignment ---

# if (any(str_detect(unique(data$estimate_type), pattern = "Zr"))) {
# # Must group by cols else multiple "effects_analysis" elements
# # get passed to fit_MA_mv()
# data <- data %>%
# group_by(estimate_type, #TODO can we just `ungroup()`?
# dataset,
# exclusion_set,
# publishable_subset,
# expertise_subset,
# collinearity_subset)
#
# } else {
# data <- data %>%
# group_by(estimate_type,
# dataset,
# exclusion_set) #TODO we need to merge the yi together for eucalyptus
# }

# Assign outcome_variable values
if (rlang::is_null(outcome_variable)) {
# Defaults
if (any(str_detect(unique(data$estimate_type), pattern = "Zr"))) {
outcome_variable <- "Zr"
outcome_SE <- "VZr"
} else {
outcome_variable <- "Z"
outcome_SE <- "VZ"
}
data <- data %>%
ungroup %>%
mutate(outcome_colname = outcome_variable,
outcome_SE_colname = outcome_SE)
} else {
MA_data <- MA_data %>%
group_by(estimate_type, dataset, exclusion_set)
# Argument supplied
matched_formulae_outcome <-
map(outcome_variable,
~ formulae_match(x = names(.x), y = .x))

matched_formulae_outcome_SE <-
map(outcome_SE,
~ formulae_match(x = names(.x), y = .x))

data <- map2(names(matched_formulae_outcome),
matched_formulae_outcome,
.f = ~ map_match_formulae(ungroup(data), .x, .y)) %>%
bind_rows() %>%
drop_na(outcome_colname) %>%
left_join(
{
map2(names(matched_formulae_outcome_SE),
matched_formulae_outcome_SE,
.f = ~ map_match_formulae(
{data %>%
ungroup %>%
select(names(outcome_SE))},
.x,
.y,
col_name = "outcome_SE_colname")) %>%
bind_rows() %>%
drop_na(outcome_SE_colname) %>%
distinct()
},
by = unique(names(outcome_SE))
)

}

# --- Fit Meta-Models & Create Plots ---

# ----- Fit Meta-Models & Create Plots -----
cli::cli_h1(text = "Meta-analysing Datasets")

out <-
MA_data %>%
dplyr::mutate(
Z_colname =
map_chr(
effects_analysis,
~ if (!is_null(.x)) {
dplyr::select(
.x,
starts_with("Z")
) %>%
colnames()
} else {
rlang::na_chr
}
),
VZ_colname =
map_chr(
effects_analysis,
~ if (!is_null(.x)) {
dplyr::select(
.x,
starts_with("VZ")
) %>%
colnames()
} else {
rlang::na_chr
}
)
) %>%
data %>%
dplyr::mutate(
effects_analysis =
purrr::pmap(
.l = list(
effects_analysis,
Z_colname,
VZ_colname
outcome_colname,
outcome_SE_colname
),
.f = rm_inf_na
)
) %>%
dplyr::mutate(
MA_mod =
purrr::pmap(
.l = list(effects_analysis, Z_colname, VZ_colname, estimate_type),
.l = list(effects_analysis,
outcome_colname,
outcome_SE_colname,
estimate_type),
.f = fit_MA_mv
),
effects_analysis =
ifelse(is.na(MA_mod),
NA,
purrr::map2(
.x = effects_analysis,
.y = MA_mod,
.f = ~ calculate_deviation_score(.x, .y)
)
NA,
purrr::map2(
.x = effects_analysis,
.y = MA_mod,
.f = ~ calculate_deviation_score(.x, .y)
)
),
effects_analysis =
ifelse(rlang::is_na(effects_analysis),
NA,
purrr::map2(
.x = effects_analysis,
.y = dataset,
.f = ~ box_cox_transform(.x, .y)
)
NA,
purrr::map2(
.x = effects_analysis,
.y = dataset,
.f = ~ box_cox_transform(.x, .y)
)
),
sorensen_glm =
purrr::map(
Expand Down Expand Up @@ -155,44 +230,46 @@ meta_analyse_datasets <- function(MA_data, filter_vars = NULL) {
)
)
)

# --- Fit Multivariate Models ---
# TODO apply same strategy below for filtering any other models out that we might not want
if (!is.null(filter_vars)) { # TODO check if this is the correct way to check for NULL

# TODO apply same strategy below for filtering any other models out that we might not want to fit
if (!rlang::is_null(filter_vars)) {
filter_var_syms <- map(filter_vars, ~ rlang::call_args(.x) %>% pluck(1))

filter_var_names <- filter_var_syms %>%
map(rlang::quo_name) %>%
purrr::list_c()

multivar_mods <-
out %>%
dplyr::filter(!!!filter_vars) %>%
group_by(dataset, !!!filter_var_syms) %>% # retain grouping, but add additional dataset grouping
mutate(effects_analysis = map(effects_analysis, ~ .x %>%
unnest(review_data))) %>%
unnest(review_data))) %>%
mutate(
MA_mod_mv = map(effects_analysis, fit_multivar_MA),
.keep = "none"
) # retain output and grouping cols only

by <- join_by("dataset", !!!filter_var_names)

out <- left_join(out, multivar_mods, by) %>%
select(-ends_with("_colname"))
} else {

out <- out %>%
mutate(effects_analysis = map(effects_analysis, ~ .x %>%
unnest(review_data))) %>%
unnest(review_data))) %>%
mutate(MA_mod_mv = map(effects_analysis, fit_multivar_MA)) %>%
select(-ends_with("_colname"))
}

out <-
out %>% # replace any NULL values with NA
mutate(across(everything(),
.fns = ~ coalesce(.x, list(NA))
.fns = ~ coalesce(.x, list(NA))
))

return(out)
}

0 comments on commit bdf5375

Please sign in to comment.