diff --git a/.DS_Store b/.DS_Store index 56e92df7..72a46ace 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/DESCRIPTION b/DESCRIPTION index e480c88f..6334271b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,6 @@ LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 Imports: - dplyr, WeightIt, marginaleffects, stats, @@ -38,8 +37,9 @@ Imports: gtools, cobalt, stringr, - mitml, - psych + huxtable, + officer, + flextable Suggests: testthat (>= 3.0.0) Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 060b9adb..e5760d91 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,12 +17,4 @@ export(get_reference_values) export(make_love_plot) export(perform_multiple_comparison_correction) export(trimWeights) -import(ggplot2) -import(stats) -import(utils) -importFrom(WeightIt,weightitMSM) -importFrom(dplyr,"%>%") -importFrom(dplyr,mutate) -importFrom(dplyr,select) -importFrom(gtools,permutations) -importFrom(stringr,str_count) +importFrom(survey,svyglm) diff --git a/R/assessBalance.R b/R/assessBalance.R index 92ffd41c..66511732 100644 --- a/R/assessBalance.R +++ b/R/assessBalance.R @@ -5,7 +5,6 @@ #' guidelines from Jackson, 2016 on how to assess balance for time-varying #' exposures. #' -#' @importFrom dplyr %>% #' @seealso {[cobalt] package, #' } #' @seealso {Jackson, 2016 for more on assessing balance for time-varying @@ -18,8 +17,6 @@ #' @param exposure_time_pts list of integers at which weights will be #' created/assessed that correspond to time points when exposure wass measured #' @param outcome name of outcome variable with ".timepoint" suffix -#' @param tv_confounders list of time-varying confounders with ".timepoint" -#' suffix #' @param formulas list of balancing formulas at each time point output from #' createFormulas() #' @param weights list of IPTW weights output from createWeights, required for @@ -62,7 +59,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "prebalance", #' formulas = f, #' save.out = FALSE) @@ -70,7 +66,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "prebalance", #' formulas = f, #' balance_thresh = 0.2, @@ -79,7 +74,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "prebalance", #' formulas = f, #' balance_thresh = c(0.1, 0.2), @@ -97,7 +91,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "weighted", #' weights = w, #' formulas = f, @@ -106,7 +99,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "weighted", #' weights = w, #' formulas = f, @@ -116,7 +108,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "weighted", #' weights = w, #' formulas = f, @@ -126,8 +117,9 @@ -assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights = NULL, balance_thresh = 0.1, - imp_conf = NULL, verbose = TRUE, save.out = TRUE) { + +assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights = NULL, balance_thresh = 0.1, + imp_conf = NULL, verbose = TRUE, save.out = TRUE){ if (!is.logical(save.out)) { stop("`save.out` must be a flag (TRUE or FALSE)", call. = FALSE) @@ -136,6 +128,9 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, if (missing(home_dir)) { stop("Please supply a home directory.", call. = FALSE) } + else if(!is.character(home_dir)){ + stop("Please provide a valid home directory path as a string if you wish to save output locally.", call. = FALSE) + } else if(!dir.exists(home_dir)) { stop("Please provide a valid home directory path if you wish to save output locally.", call. = FALSE) } @@ -145,26 +140,59 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, stop("Please supply data as either a dataframe with no missing data or imputed data in the form of a mids object or path to folder with imputed csv datasets.", call. = FALSE) } - if (missing(exposure)) { + + else if (!inherits(data, "mids") && !is.data.frame(data) && + !(is.list(data) && all(vapply(data, is.data.frame, logical(1L))))) { + stop("Please provide either a 'mids' object, a data frame, or a list of imputed csv files in the 'data' field.", call. = FALSE) + } + else if(is.list(data) && !is.data.frame(data)){ + if (sum(sapply(data, is.data.frame)) != length(data)){ + stop("Please supply a list of data frames that have been imputed.", call. = FALSE) + } + } + + if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } - if (missing(outcome)) { + else if(!is.character(exposure) | length(exposure) != 1){ + stop("Please supply a single exposure as a character.", call. = FALSE) + } + + if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } - if (missing(exposure_time_pts)) { - stop("Please supply the exposure time points at which you wish to create weights.", call. = FALSE) + else if(!is.character(outcome) | length(outcome) != 1){ + stop("Please supply a single outcome as a character.", call. = FALSE) } - if (missing(tv_confounders)) { - stop("Please supply a list of time-varying confounders.", call. = FALSE) + + if (missing(exposure_time_pts)){ + stop("Please supply the exposure time points at which you wish to create weights.", call. = FALSE) } - if (missing(type)) { - stop("Please supply a 'weighted', 'prebalance' type", call. = FALSE) + else if(!is.numeric(exposure_time_pts)){ + stop("Please supply a list of exposure time points as integers.", call. = FALSE) } - if (missing(formulas)) { + + if (missing(formulas)){ stop("Please supply a list of balancing formulas.", call. = FALSE) } + else if(!is.list(formulas) | is.data.frame(formulas)){ + stop("Please provide a list of formulas for each exposure time point", call. = FALSE) + } + else if(length(formulas) != length(exposure_time_pts)){ + stop("Please provide a list of formulas for each exposure time point", call. = FALSE) + } + else if(is.list(formulas) && !is.data.frame(formulas)){ + if (sum(sapply(formulas, function(x) { + inherits(x, "formula")})) != length(formulas)){ + stop("Please supply a list of formulas for each exposure time point.", call. = FALSE) + } + } - if (length(type) != 1 || !is.character(type)) { + + if (missing(type)){ + stop("Please supply a 'weighted', 'prebalance' type", call. = FALSE) + } + if (!inherits(type, "character") || length(type) != 1 ){ stop("Please provide a single type as a character string from the following list: 'prebalance', 'weighted'", call. = FALSE) } else if (!type %in% c("prebalance", "weighted")) { @@ -182,34 +210,51 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, stop("Please provide either a 'mids' object, a data frame, or a list of imputed csv files in the 'data' field.", call. = FALSE) } - if (!is.null(weights) && !is.list(weights)) { + if (!is.null(weights) && (!is.list(weights) || is.data.frame(weights))){ stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) } + else if(is.list(weights) && !is.data.frame(weights)){ + if (sum(sapply(weights, function(x) { + inherits(x, "weightitMSM")})) != length(weights)){ + stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + } + } if (!is.numeric(balance_thresh)) { stop("Please provide one or two balance thresholds as numbers from 0-1.") } - if (length(balance_thresh) == 2 && is.null(imp_conf)) { + + if (length(balance_thresh) == 2 && is.null(imp_conf)){ stop("If you wish to provide different balance threshold for important and less important confounders, please provide a list of important confounders in the 'imp_conf' field.", call. = FALSE) } - if (!is.null(imp_conf) && length(balance_thresh) == 1) { + if (!is.null(imp_conf) && length(balance_thresh) == 1){ stop("If you provide a list of important confounders, please provide a list of two balance thresholds for important and less important confounders, respectively", call. = FALSE) } - else if (!is.null(imp_conf) && !is.character(imp_conf)) { + else if(!is.null(imp_conf) && !is.character(imp_conf)){ stop("Please provide a list variable names as characters that are important confounders.", call. = FALSE) } - if (!is.list(formulas)) { - stop("Please provide a list of formulas for each exposure time point", call. = FALSE) + if(!is.logical(verbose)){ + stop("Please set verbose to either TRUE or FALSE.", call. = FALSE) + } + else if(length(verbose) != 1){ + stop("Please provide a single TRUE or FALSE value to verbose.", call. = FALSE) + } + + if(!is.logical(save.out)){ + stop("Please set save.out to either TRUE or FALSE.", call. = FALSE) + } + else if(length(save.out) != 1){ + stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE) } mi <- !is.data.frame(data) folder <- switch(type, "prebalance" = "prebalance/", "weighted/") - if (save.out) { - #creating required directories + + if (save.out){ balance_dir <- file.path(home_dir, "balance") if (!dir.exists(balance_dir)) { dir.create(balance_dir) @@ -264,7 +309,8 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, }) } else { - m <- length(data) + + m = length(data) bal_stats <- lapply(seq_len(m), function(k) { d <- data[[k]] @@ -289,10 +335,11 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, bal_stats_all_imp <- do.call(dplyr::bind_rows, bal_stats) bal_stats_all_imp <- bal_stats_all_imp[order(bal_stats_all_imp$covariate), ] - if (save.out) { + + if (save.out){ write.csv(bal_stats_all_imp, sprintf("%s/balance/%s/%s_all_imps_balance_stat_summary.csv", home_dir, type, exposure)) - if (verbose) { + if (verbose ){ cat("Pre balance statistics for each imputed dataset have now been saved in the 'balance/prebalance/' folder", "\n") } @@ -309,25 +356,24 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, avg_bal = rowMeans(do.call(cbind, lapply(bal_stats, `[[`, "std_bal_stats")))) #adds custom bal thresh info - if (!is.null(imp_conf)) { + + if (!is.null(imp_conf)){ + all_bal_stats$bal_thresh <- ifelse(all_bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2]) - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = ifelse(all_bal_stats$covariate %in% imp_conf, - # balance_thresh[1], balance_thresh[2])) - all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) - # all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - # all_bal_stats$bal_thresh, 1, 0) ) + all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < + all_bal_stats$bal_thresh, 1, 0) } - else { + else{ all_bal_stats$bal_thresh <- balance_thresh - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = balance_thresh) - all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - # all_bal_stats$bal_thresh, 1, 0) ) + + all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < + all_bal_stats$bal_thresh, 1, 0) } - if (verbose) { + + if (verbose){ cat("\n") cat(paste0("*** Averaging Across All Imputations ***"), "\n") } @@ -336,9 +382,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, } else { - # if (!is.data.frame(data)){ - # stop("Please provide data as a data frame, mids object, or list.", call. = FALSE) - # } + if (sum(duplicated(data[["ID"]])) > 0){ stop("Please provide wide dataset with a single row per ID.", call. = FALSE) } @@ -425,10 +469,10 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, bal_stats_all_imp <- do.call(dplyr::bind_rows, bal_stats) bal_stats_all_imp <- bal_stats_all_imp[order(bal_stats_all_imp$covariate), ] - if (save.out) { - write.csv(bal_stats_all_imp, sprintf("%s/balance/%s/%s_all_imps_balance_stat_summary.csv", + if (save.out){ + write.csv(bal_stats_all_imp, sprintf("%s/balance/%s/%s_all_imps_balance_stat_summary.csv", home_dir, type, exposure)) - if (verbose) { + if (verbose){ cat("Weighted balance statistics for each imputed dataset have now been saved in the 'balance/weighted/' folder", "\n") } } @@ -443,21 +487,19 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, avg_bal = rowMeans(do.call(cbind, lapply(bal_stats, `[[`, "std_bal_stats")))) # #adds custom bal thresh info - if (!is.null(imp_conf)) { - all_bal_stats$bal_thresh <- ifelse(all_bal_stats$covariate %in% imp_conf, - balance_thresh[1], balance_thresh[2]) - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = ifelse(all_bal_stats$covariate %in% imp_conf, - # balance_thresh[1], balance_thresh[2])) - all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) - # all_bal_stats <- all_bal_stats %>%dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - # all_bal_stats$bal_thresh, 1, 0) ) + if (!is.null(imp_conf)){ + + all_bal_stas$bal_thresh <-ifelse(all_bal_stats$covariate %in% imp_conf, + balance_thresh[1], balance_thresh[2]) + + all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < + all_bal_stats$bal_thresh, 1, 0) } else{ all_bal_stats$bal_thresh <- balance_thresh - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = balance_thresh) - all_bal_stats$balanced = ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) - # all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - # all_bal_stats$bal_thresh, 1, 0) ) + + all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < + all_bal_stats$bal_thresh, 1, 0) } if (verbose) { @@ -468,6 +510,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, tot_covars <- sapply(strsplit(all_bal_stats$covariate, "\\."), `[`, 1) } else { + if (sum(duplicated(data$"ID")) > 0){ stop("Please provide wide dataset with a single row per ID.", call. = FALSE) } @@ -504,6 +547,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, if (inherits(data, "mids")) { exposure_type <- if (is.numeric(mice::complete(data, 1)[, paste0(exposure, '.', exposure_time_pts[1])])) "continuous" else "binary" + } else if (!is.data.frame(data)) { exposure_type <- if (is.numeric(data[[1]][, paste0(exposure, '.', exposure_time_pts[1])])) "continuous" else "binary" @@ -512,27 +556,28 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, exposure_type <- if (is.numeric(data[[paste0(exposure, '.', exposure_time_pts[1])]])) "continuous" else "binary" } - temp <- all_bal_stats[all_bal_stats$exp_time == exposure_time_pt,, drop = FALSE] - # temp <- all_bal_stats %>% filter(exp_time == exposure_time_pt) + + temp <- all_bal_stats[all_bal_stats$exp_time == exposure_time_pt, , drop = FALSE] make_love_plot(home_dir, folder, exposure, exposure_time_pt, exposure_type, k = 0, form_name, temp, data_type, balance_thresh, weights_method, imp_conf, verbose, save.out) }) - - if (save.out) { - # Save out all correlations/std mean differences + if (save.out){ outfile <- sprintf("%s/balance/%s/%s-%s_all_%s_%s_associations.html", home_dir, type, exposure, outcome, type, weights_method) + + # Save out all correlations/std mean differences sink(outfile) stargazer::stargazer(all_bal_stats, type = "html", digits = 2, column.labels = colnames(all_bal_stats), summary = FALSE, rownames = FALSE, header = FALSE, out = outfile) sink() - if (verbose) { - if (mi) { + + if (verbose){ + if (mi){ cat(sprintf("Summary plots for %s %s averaged across all imputations have been saved out for each time point in the 'balance/%s/plots/' folder.\n", - form_name, exposure, type)) + form_name, exposure, type)) } else { cat(sprintf("Summary plots for %s %s have now been saved out for each time point in the 'balance/%s/plots/' folder.\n", @@ -547,7 +592,8 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, home_dir, type, exposure, type, weights_method), row.names = FALSE) - if (verbose) { + + if (verbose){ if (mi) { cat(sprintf("Check 'balance/%s/' folder for a table of all %s correlations or standardized mean differences averaged across imputed datasets.\n", @@ -563,17 +609,17 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, # Finding all imbalanced variables - unbalanced_covars <- all_bal_stats[all_bal_stats$balanced == 0,, drop = FALSE] - # unbalanced_covars <- all_bal_stats %>% - # filter(balanced == 0) + + unbalanced_covars <- all_bal_stats[all_bal_stats$balanced == 0, , drop = FALSE] + unbalanced_constructs <- sapply(strsplit(unbalanced_covars$covariate, "\\."), "[", 1)[!duplicated(sapply(strsplit(unbalanced_covars$covariate, "\\."), "[", 1))] - if (verbose) { + if (verbose){ if (mi) { cat(sprintf("USER ALERT: Averaging across all imputed datasets for exposure %s using the %s formulas and %s :\n", - exposure, form_name, weights_method)) + exposure, form_name, weights_method)) cat(sprintf("The median absolute value relation between exposure and confounder is %s (range = %s-%s).\n", round(median(abs(all_bal_stats$avg_bal)), 2), @@ -592,34 +638,48 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, round(median(abs(as.numeric(unbalanced_covars$avg_bal[unbalanced_covars$balanced == 0]))), 2), round(min(unbalanced_covars$avg_bal), 2), round(max(unbalanced_covars$avg_bal), 2))) - # cat(paste0("As shown below, the following ", nrow(unbalanced_covars), " covariates across time points out of ", - # length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), "%) spanning ", - # length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), - # "%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to ", - # exposure, " of ", round(median(abs(as.numeric(unlist(unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced == 0) %>% - # dplyr:: select(avg_bal))))), 2), " (range=", - # round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n") } else{ cat("There are no imbalanced covariates.", "\n") } } else { - #I'm going to let you finish the sprintf() conversion :) - cat(paste0("USER ALERT: For exposure ", exposure, " using the ", form_name," formulas and ", weights_method, " :"), "\n") - - cat(paste0("The median absolute value relation between exposure and confounder is ", - round(median(abs(all_bal_stats$avg_bal)), 2), " (range = ", - round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_stats$avg_ba), 2), ")."), "\n") + # cat(paste0("USER ALERT: For exposure ", exposure, " using the ", form_name," formulas and ", weights_method, " :"), "\n") + cat(sprintf("USER ALERT: For exposure %s using the %s formulas and %s :\n", + exposure, form_name, weights_method)) + + # cat(paste0("The median absolute value relation between exposure and confounder is ", + # round(median(abs(all_bal_stats$avg_bal)), 2), " (range = ", + # round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_stats$avg_ba), 2), ")."), "\n") + cat(sprintf("The median absolute value relation between exposure and confounder is %s (range = %s -%s). \n", + round(median(abs(all_bal_stats$avg_bal)), 2), + round(min(all_bal_stats$avg_ba), 2), + round(max(all_bal_stats$avg_ba), 2))) if (nrow(unbalanced_covars) > 0){ - cat(paste0("As shown below, the following ", nrow(unbalanced_covars), " covariates across time points out of ", - length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), "%) spanning ", - length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), - "%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to ", - exposure, " of ", round(median(abs(as.numeric(unlist(unbalanced_covars %>% subset(unbalanced_covars$balanced == 0) %>% - dplyr::select(avg_bal))))), 2), " (range=", - round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n") + # cat(paste0("As shown below, the following ", nrow(unbalanced_covars), " covariates across time points out of ", + # length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), "%) spanning ", + # length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), + # "%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to ", + # exposure, " of ", round(median(abs(as.numeric(unlist( + # # unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced == 0) %>% + # # dplyr:: select(avg_bal) + # unbalanced_covars[unbalanced_covars$balanced == 0, ][, "avg_bal"] + # + # )))), 2), " (range=", + # round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n") + + cat(sprintf("As shown below, the following %s covariates across time points out of %s total (%s%%) spanning %s domains out of %s (%s%%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to %s of %s (range=%s-%s) :\n", + nrow(unbalanced_covars), + length(tot_covars), + round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), + length(unbalanced_constructs), + length(tot_cons), + round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), + exposure, + round(median(abs(as.numeric(unlist(unbalanced_covars[unbalanced_covars$balanced == 0, ][, "avg_bal"])))), 2), + round(min(unbalanced_covars$avg_bal), 2), + round(max(unbalanced_covars$avg_bal), 2))) } else{ cat("There are no imbalanced covariates.", "\n") @@ -637,9 +697,12 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, if (save.out) { # Save out only imbalanced covariates - sink(paste0(home_dir, "/balance/", type, "/", exposure, "-", outcome, "_",type,"_", weights_method, "_all_covariates_imbalanced.html")) - stargazer(unbalanced_covars, type = "html", digits = 2, column.labels = colnames(unbalanced_covars), summary = FALSE, rownames = FALSE, header = FALSE, - out = paste0(home_dir, "/balance/", type, "/", exposure, "-", outcome, "_", type, "_", weights_method, "_all_covariates_imbalanced.html")) + outfile <- sprintf("%s/balance/%s/%s-%s_%s_%s_all_covariates_imbalanced.html", + home_dir, type, exposure, outcome, type, weights_method) + + sink(outfile) + stargazer::stargazer(unbalanced_covars, type = "html", digits = 2, column.labels = colnames(unbalanced_covars), summary = FALSE, rownames = FALSE, header = FALSE, + out = outfile) sink() } } diff --git a/R/calcBalStats.R b/R/calcBalStats.R index 37e49c74..cb84433e 100644 --- a/R/calcBalStats.R +++ b/R/calcBalStats.R @@ -78,16 +78,18 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_pts, outcome, balance_thresh, k = 0, weights = NULL, imp_conf = NULL, verbose = TRUE, save.out = TRUE){ - if(!is.list(formulas)) { + + if(!is.list(formulas) | is.data.frame(formulas)){ stop("Please provide a list of formulas for each exposure time point", call. = FALSE) } - if (!is.null(weights) & !inherits(weights, "weightitMSM")){ + if (!is.null(weights) && !inherits(weights, "weightitMSM")){ stop("Please supply a list of weights output from the createWeights function (via WeightIt::WeightItMSM).", call. = FALSE) } form_name <- sapply(strsplit(names(formulas[1]), "_form"), "[", 1) - exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric"), "continuous", "binary") - weighted <- !is.null(weights) + + exposure_type <- if(inherits(data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric")) "continuous" else "binary" + weighted <- if(!is.null(weights)) 1 else 0 factor_covariates <- colnames(data)[sapply(data, is.factor)] factor_covariates <- setdiff(factor_covariates, "ID") @@ -101,14 +103,23 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ weights_method <- "no weights" } + folder <- if (weighted) "weighted/" else "prebalance/" data_type <- if (k == 0) "single" else "imputed" - if (data_type == "imputed" && verbose) { - cat(sprintf("**Imputation %s**\n", k)) + if (data_type == "imputed" && verbose){ + cat(paste0("**Imputation ", k, "**"), "\n") } + #split factors + if(length(names(data)[sapply(data, class ) == "factor"]) > 0){ + data$"ID" = as.numeric(data$"ID") + data2 <- cobalt::splitfactor(data, names(data)[sapply(data, class ) == "factor"], drop.first = FALSE ) + } + else{ + data2 <- data + } #creating initial data frames #data frame with all sampling weights for all exposures at all exposure time points for all histories @@ -131,9 +142,18 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ covars <- gsub(" ", "", covars) + if(length(names(data)[sapply(data, class ) == "factor"]) > 0){ + + #making factor covars separate variables + data_cov <- data[, covars] + data_cov <- cobalt::splitfactor(data_cov, names(data_cov)[sapply(data_cov, class ) == "factor"], drop.first = FALSE ) + covars <- colnames(data_cov) + } + + # GETTING BALANCE STATS FOR T=1 W/ NO EXPOSURE HISTORY (ok to standardize immediately) if (length(lagged_time_pts) == 0) { - temp <- data + temp <- data2 # Unweighted pre-balance checking if (!weighted) { @@ -201,7 +221,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ & data$flag == flag, t, NA) # finding those w/ vals <= median exp @ time pt & flagged at prev t's } else { # for binary exp - data$flag <- ifelse(data[, exps_time[t]] == 0 & data$flag == flag, t, NA) # if exposure is absent & flagged at prev t's + data$flag <- ifelse(data[, exps_time[t]] == 0 & data$flag == flag, t , NA) # if exposure is absent & flagged at prev t's } } @@ -212,6 +232,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ } else { # binary exp data$flag <- ifelse(data[[exps_time[t]]] == 1 & data$flag == flag, t, NA) # if exposure is present & flagged at prev t's + } } @@ -226,29 +247,36 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ data$flag <- NULL # resets flag } # ends history loop (e.g., "l-l-l") - #summarize contributors to each history - prop_sum <- prop_weights %>% - dplyr::group_by(as.factor(history)) %>% - dplyr::summarize(n = dplyr::n()) + prop_sum <- aggregate(exposure ~ as.factor(history), data = prop_weights, + FUN = function(x) n = length(x)) # GET BALANCE STATISTICS FOR T>1 (when there is a history to weight on) if (length(lagged_time_pts) > 0) { + # Merge data with prop_weights by ID - temp <- merge(data, prop_weights, by = "ID", all.x = TRUE) + temp <- merge(data2, prop_weights, by = "ID", all.x = TRUE) #data with factors split up + # Removing any histories that only have 1 or 0 person contributing (cannot calc bal stats) if (sum(prop_sum$n == 1) > 0 || sum(prop_sum$n == 0) > 0) { - ommitted_histories <- as.character(as.data.frame(prop_sum)[prop_sum$n == 1, 1]) + + ommitted_histories <- as.character(as.data.frame(prop_sum)[prop_sum$n == 1 | prop_sum$n == 0, 1]) if (data_type == "imputed"){ - cat(paste0("USER ALERT: the following history/histories, ", ommitted_histories, - ", has/have been omitted from balance checking for exposure ", exposure, - ", imputation ", k, ", at time point ", exposure_time_pt)) + # cat(paste0("USER ALERT: the following history/histories, ", ommitted_histories, + # ", has/have been omitted from balance checking for exposure ", exposure, + # ", imputation ", k, ", at time point ", exposure_time_pt)) + cat(sprintf("USER ALERT: the following history/histories, %s has/have been omitted from + balance checking for exposure %s imputation %s at time point %s:", + omitted_histories, exposure, k, exposure_time_pt)) } else{ - cat(paste0("USER ALERT: the following history/histories, ", ommitted_histories, - ", has/have been omitted from balance checking for exposure ", exposure, - " at time point ", exposure_time_pt)) + # cat(paste0("USER ALERT: the following history/histories, ", ommitted_histories, + # ", has/have been omitted from balance checking for exposure ", exposure, + # " at time point ", exposure_time_pt)) + cat(sprintf("USER ALERT: the following history/histories, %s has/have been omitted from + balance checking for exposure %s at time point %s:", + omitted_histories, exposure, exposure_time_pt)) } temp <- temp[!temp$history %in% ommitted_histories, ] @@ -258,10 +286,14 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (!weighted) { # no IPTW weighting but weighting on history if (exposure_type == "continuous") { bal_stats <- sapply(sort(unique(temp$history)), function(i) { # finding balance by history - temp2 <- temp[temp$history == i,, drop = FALSE] - # finding covariance + temp2 <- temp[temp$history == i, , drop = FALSE ] + cobalt::col_w_cov(temp2[covars], temp2[[paste0(exposure, ".", exposure_time_pt)]], std = FALSE) + + # #should be same length as covars (already have factors split up) + # cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance + # subset = temp2$history[temp2$history == i] == i) # subsetting by that history }) # getting weighted mean across histories (cols), weighting by proportion of those w/ that same history @@ -271,33 +303,32 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) - # standardizing balance statistics after weighting by history + bal_stats$std_bal_stats <- weighted_bal_stats / - (sapply(seq(ncol(data[, covars])), function(x) { #issue: looking in data for unweighted vals but factors have additional vars - sd(as.numeric(data[, covars][, x]), na.rm = TRUE) }) *# unweighted covar sd + (sapply(seq(nrow(bal_stats)), function(x) { #issue: looking in data for unweighted vals but factors have additional vars + sd(as.numeric(data2[, rownames(bal_stats)[x]]), na.rm = TRUE) }) *# unweighted covar sd sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE)) # exposure SD at that time pt - # bal_stats <- bal_stats %>% - # dplyr::mutate(std_bal_stats = weighted_bal_stats / - # (sapply(seq(nrow(bal_stats)), function(x) { - # sd(as.numeric(data[, covars][, x]), na.rm = TRUE) }) *# unweighted covar sd - # sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE))) # exposure SD at that time pt - #temp error warning re: factor w/ multiple levels creating different numbers of variables per history --makes bal_stats a list and breaks std code if (!is.list(bal_stats)) { stop("There are unequal numbers of variables across histories, likely due to an ordinal variable with several levels denoted as a factor.", call. = FALSE) } - bal_stats <- bal_stats[grepl("std", names(bal_stats))] + + bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) + } #ends continuous else if (exposure_type == "binary") { bal_stats <- sapply(sort(unique(temp$history)), function(i) { - temp2 <- temp[temp$history == i,, drop = FALSE] - cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference - subset = temp2$history[temp2$history == i] == i) # subsetting by that history + temp2 <- temp[temp$history == i, , drop = FALSE ] + + cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE) # finding mean difference + + # cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference + # subset = temp2$history[temp2$history == i] == i) # subsetting by that history }) # getting weighted mean across histories, weighting by proportion of those w/ that same history @@ -308,16 +339,15 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after finding weighted balance stats - bal_stats <- bal_stats %>% - dplyr::mutate(std_bal_stats = weighted_bal_stats/ - sapply(seq(ncol(data[, covars])), function(x){ - sqrt(mean( #dividing by pool SD estimate (unadjusted) - var(as.numeric(data[data[, (colnames(data) %in% paste0(exposure, ".", - exposure_time_pts[1]))] == 1 , colnames(data) %in% covars[x]])), #treated var - var(as.numeric(data[data[, (colnames(data) == paste0(exposure, ".", - exposure_time_pts[1]))] == 0 , colnames(data) %in% covars[x]])) #untreated var - )) - })) + bal_stats$std_bal_stats <- weighted_bal_stats/ + sapply(seq(ncol(data[, covars])), function(x){ + sqrt(mean( #dividing by pool SD estimate (unadjusted) + var(as.numeric(data[data[, (colnames(data) %in% paste0(exposure, ".", + exposure_time_pts[1]))] == 1 , colnames(data) %in% covars[x]])), #treated var + var(as.numeric(data[data[, (colnames(data) == paste0(exposure, ".", + exposure_time_pts[1]))] == 0 , colnames(data) %in% covars[x]])) #untreated var + )) + }) #temp error warning re: factor w/ multiple levels creating different numbers of variables per history --makes bal_stats a list and breaks std code if(inherits(bal_stats, "list")){ @@ -328,8 +358,8 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # For a weighted_bal_stat of 0, make std stat also 0 so as not to throw an error bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 + bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) - bal_stats <- bal_stats[grepl("std", names(bal_stats))] } #ends binary } #ends weighted=0 @@ -342,8 +372,9 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- sapply(sort(unique(temp$history)), function(i) { temp2 <- temp[temp$history == i,, drop = FALSE] + cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance - subset = temp2$history[temp2$history == i] == i, # subsetting by that history + # subset = temp2$history[temp2$history == i] == i, # subsetting by that history weights = temp2[, "weights"]) # adding IPTW weights }) @@ -361,17 +392,20 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after weighting by history - bal_stats <- bal_stats %>% - dplyr::mutate(std_bal_stats = weighted_bal_stats / - (sapply(seq(ncol(data[, covars])), function(x) { - sd(as.numeric(data[, covars][, x]), na.rm = TRUE) # unweighted covar sd - }) * sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE))) + # bal_stats <- bal_stats %>% + bal_stats$std_bal_stats <- weighted_bal_stats / + (sapply(seq(nrow(bal_stats)), function(x) { #issue: looking in data for unweighted vals but factors have additional vars + sd(as.numeric(data2[, rownames(bal_stats)[x]]), na.rm = TRUE) }) *# unweighted covar sd + sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE)) # exposure SD at that time pt + # For a weighted_bal_stat of 0, make std stat also 0 so as not to throw an error bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 - bal_stats <- bal_stats %>% - dplyr::select(contains(c("std"))) + # bal_stats <- bal_stats %>% + # dplyr::select(contains(c("std"))) + bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) + } #ends continuous else if (exposure_type == "binary") { @@ -379,8 +413,9 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- sapply(sort(unique(temp$history)), function(i) { temp2 <- temp[temp$history == i,, drop = FALSE] + cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference - subset = temp2$history[temp2$history == i] == i, # subsetting by that history + # subset = temp2$history[temp2$history == i] == i, # subsetting by that history weights = temp2[, "weights"]) # adding IPTW weights }) @@ -398,16 +433,15 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after finding weighted balance stats - bal_stats <- bal_stats %>% - dplyr::mutate(std_bal_stats = weighted_bal_stats/ - sapply(seq(ncol(data[, covars])), function(x){ - sqrt(mean( #dividing by pool SD estimate (unadjusted) - var(as.numeric(data[data[, (colnames(data) %in% paste0(exposure, ".", - exposure_time_pts[1]))] == 1 , colnames(data) %in% covars[x]])), #treated var - var(as.numeric(data[data[, (colnames(data) == paste0(exposure, ".", - exposure_time_pts[1]))] == 0 , colnames(data) %in% covars[x]])) #untreated var - )) - })) + bal_stats$std_bal_stats <- weighted_bal_stats/ + sapply(seq(ncol(data[, covars])), function(x){ + sqrt(mean( #dividing by pool SD estimate (unadjusted) + var(as.numeric(data[data[, (colnames(data) %in% paste0(exposure, ".", + exposure_time_pts[1]))] == 1 , colnames(data) %in% covars[x]])), #treated var + var(as.numeric(data[data[, (colnames(data) == paste0(exposure, ".", + exposure_time_pts[1]))] == 0 , colnames(data) %in% covars[x]])) #untreated var + )) + }) # For a weighted_bal_stat of 0, make std stat also 0 so as not to throw an error bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 @@ -415,9 +449,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # For a weighted_bal_stat of 0, make std stat also 0 so as not to throw an error bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 - - bal_stats <- bal_stats %>% - dplyr::select(contains(c("std"))) + bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) } #ends binary } #ends weighted @@ -431,9 +463,23 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(bal_stats) bal_stats$covariate <- rownames(bal_stats) - # Renames factor covariates - bal_stats$covariate[sapply(strsplit(bal_stats$covariate, "_"), "[", 1) %in% factor_covariates] <- - sapply(strsplit(bal_stats$covariate, "_"), "[", 1)[sapply(strsplit(bal_stats$covariate, "_"), "[", 1) %in% factor_covariates] + #averages across factor levels to create one bal stat per factor variable? + data$ID <- as.numeric(data$ID) + f_vars <- colnames(data)[sapply(data, is.factor)] + + if(length(f_vars) > 0){ + f_stats <- bal_stats[rownames(bal_stats)[sapply(strsplit(rownames(bal_stats), "_"), "[", 1) %in% f_vars], ] + f_stats$name <- sapply(strsplit(rownames(f_stats), "_"), "[", 1) + test <- aggregate(std_bal_stats ~ name, data = f_stats, + FUN = function(x) new = mean(x)) + + colnames(test) <- c("covariate", "std_bal_stats") + new <- data.frame(std_bal_stats = test$std_bal_stats, + covariate = test$covariate) + rownames(new) <- new$covariate + bal_stats <- rbind(bal_stats[rownames(bal_stats)[!sapply(strsplit(rownames(bal_stats), "_"), "[", 1) %in% f_vars], ], + new) + } #adds custom bal thresh info if (!is.null(imp_conf)){ @@ -443,6 +489,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ else{ bal_stats$bal_thresh <- balance_thresh bal_stats$balanced <- ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) + } bal_stats$exposure <- exposure @@ -461,45 +508,58 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (verbose & save.out) { if (data_type == "imputed"){ - cat(paste0("For each time point and imputation, ", gsub("/", "", folder), " summary plots for ", - form_name, " formulas weighting method ", - weights_method, " have now been saved in the '", folder, "plots/' folder."), "\n") + + cat(paste0("For each time point and imputation, %s summary plots for %s + formulas weighting method %s have now been saved in the %s plots/' folder.\n", + gsub("/", "", folder), form_name, weights_method, folder)) + } - else {cat(paste0(" For each time point, ", gsub("/", "", folder), " summary plots for ", - form_name, " formulas and weighting method ", - weights_method, " have now been saved in the '", folder, "plots/' folder."), "\n") + else { + + cat(paste0("For each time point, %s summary plots for %s + formulas weighting method %s have now been saved in the %s plots/' folder.\n", + gsub("/", "", folder), form_name, weights_method, folder)) } } # Summarizing balance - bal_summary_exp <- all_bal_stats %>% - dplyr::group_by(exp_time) %>% - dplyr::summarize(balanced_n = sum(balanced == 1), # Tallying balanced covariates - imbalanced_n = sum(balanced == 0), # Tallying imbalanced covariates - n = dplyr::n()) + bal_summary_exp <- as.data.frame(aggregate(balanced ~ exp_time, data = all_bal_stats, + FUN = function(x) c(balanced_n = sum(x == 1), + imbalanced_n = sum(x == 0), + n = length(x)) )) + bal_summary_exp <- do.call(data.frame, bal_summary_exp) + colnames(bal_summary_exp) <- c("exp_time", "balanced_n", "imbalanced_n", "n") if (save.out){ - write.csv(bal_summary_exp, paste0(home_dir, "/balance/", folder, form_name, "_", exposure, "_", k, "_", - weights_method, "_balance_stat_summary.csv")) - write.csv(all_prop_weights, paste0(home_dir, "/balance/", folder, "/", form_name, "_form_", exposure, "_", k, - "_", weights_method, "_history_sample_weight.csv")) + write.csv(bal_summary_exp, + sprintf("%s/balance/%s%s_%s_%s_%s_balance_stat_summary.csv", + home_dir,folder, form_name, exposure, k, weights_method)) + + write.csv(all_prop_weights, + sprintf("%s/balance/%s/%s_form_%s_%s_%s_history_sample_weight.csv", + home_dir, folder, form_name, exposure, k, weights_method)) + if (verbose){ cat("\n") if (data_type == "imputed"){ - cat(paste0("Balance statistics using ", form_name, " formulas for ", exposure, ", imputation ", k, ", using ", - weights_method, " have been saved in the 'balance/", folder, "' folder"), "\n") - cat(paste0("Sampling weights ", "using the ", form_name, " for ", exposure, ", imputation ", k, - " have been saved in the 'balance/", folder, "' folder"), "\n") + cat(sprintf("Balance statistics using %s formulas for %s imputation %s, using + %s have been saved in the 'balance/%s' folder. \n", + form_name, exposure, k, weights_method, folder)) + + cat(sprintf("Sampling weights using the %s for %s imputation %s have been saved in the 'balance/%s' folder., \n", + form_name, exposure, k, folder)) } else{ - cat(paste0("Balance statistics using ", form_name, " formulas for ", exposure, "using ", - weights_method, " have been saved in the 'balance/", folder, "' folder"), "\n") - cat(paste0("Sampling weights ", "using the ", form_name, " for ", exposure, - " have been saved in the 'balance/", folder, "' folder"), "\n") + cat(sprintf("Balance statistics using %s formulas for %s using + %s have been saved in the 'balance/%s' folder. \n", + form_name, exposure, weights_method, folder)) + + cat(sprintf("Sampling weights using the %s for %s have been saved in the 'balance/%s' folder., \n", + form_name, exposure, folder)) } cat("\n") } @@ -508,52 +568,89 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # tallies total possible COVARIATES FROM FORM FOR ASSESSING BALANCE all_form <- as.data.frame(do.call(rbind, formulas)) tot_covars <- deparse(all_form[, 3], width.cutoff = 300) - tot_covars <- as.character(unlist(strsplit(tot_covars, "\\+")))[!grepl("form", as.character(unlist(strsplit(tot_covars, "\\+"))))] + tot_covars <- as.character(unlist(strsplit(tot_covars, "\\+")))[ + !grepl("form", as.character(unlist(strsplit(tot_covars, "\\+"))))] tot_covars <- gsub(" ", "", tot_covars) - tot_covars <- na.omit(sapply(strsplit(tot_covars, "\\."), "[", 1)[!duplicated(sapply(strsplit(tot_covars, "\\."), "[", 1))]) + tot_covars <- na.omit(sapply(strsplit(tot_covars, "\\."), "[", 1)[ + !duplicated(sapply(strsplit(tot_covars, "\\."), "[", 1))]) imbalanced_covars <- sum(bal_summary_exp$imbalanced_n, na.rm = TRUE) total_covars <- sum(bal_summary_exp$n, na.rm = TRUE) - percentage_imbalanced <- round((imbalanced_covars / total_covars) * 100, 0) - - remaining_imbalanced_domains <- length(sapply(strsplit(all_bal_stats[all_bal_stats$balanced == 0, "covariate"], "\\."), - "[", 1)[!duplicated(sapply(strsplit(all_bal_stats[all_bal_stats$balanced == 0, "covariate"], - "\\."), "[", 1))]) total_domains <- length(tot_covars) - # remaining_avg_abs_corr <- round(mean(abs(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"]), na.rm = TRUE), 2) - remaining_avg_abs_corr <- round(median(abs(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"]), na.rm = TRUE), 2) - remaining_corr_range <- paste0(round(min(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"], na.rm = TRUE), 2), - "-", round(max(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"], na.rm = TRUE), 2)) + if (imbalanced_covars > 0){ + percentage_imbalanced <- round((imbalanced_covars / total_covars) * 100, 0) + + remaining_imbalanced_domains <- length(sapply(strsplit(all_bal_stats[all_bal_stats$balanced == 0, "covariate"], "\\."), + "[", 1)[!duplicated(sapply(strsplit(all_bal_stats[all_bal_stats$balanced == 0, "covariate"], + "\\."), "[", 1))]) + + # remaining_avg_abs_corr <- round(mean(abs(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"]), na.rm = TRUE), 2) + remaining_avg_abs_corr <- round(median(abs(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"]), na.rm = TRUE), 2) + remaining_corr_range <- paste0(round(min(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"], na.rm = TRUE), 2), + "-", round(max(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"], na.rm = TRUE), 2)) + } if (verbose) { if (data_type == "imputed") { - cat(paste0("USER ALERT: For exposure ", exposure, " imputation ", k, " using ", weights_method, " and ", form_name, " formulas: "), "\n") - cat(paste0("The median absolute value relation between exposure and confounder is ", round(median(abs(all_bal_stats$std_bal_stats)), 2), " (range = ", - round(min(all_bal_stats$std_bal_stats), 2), "-", round(max(all_bal_stats$std_bal_stats), 2), ")."), "\n") + cat(sprintf("USER ALERT: For exposure %s imputation %s using %s and %s formulas: \n", + exposure, k, weights_method, form_name)) - cat(paste0("As shown below, ", imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced, - "%) covariates across time points, corresponding to ", - remaining_imbalanced_domains, " out of ", total_domains, - " domains, remain imbalanced with a remaining median absolute value correlation/std mean difference of ", - remaining_avg_abs_corr, " (range= ", remaining_corr_range, "):"), "\n") + cat(sprintf("The median absolute value relation between exposure and confounder is %s (range = %s - %s).\n", + round(median(abs(all_bal_stats$std_bal_stats)), 2), + round(min(all_bal_stats$std_bal_stats), 2), + round(max(all_bal_stats$std_bal_stats), 2))) cat("\n") - cat(knitr::kable(bal_summary_exp, caption = paste0("Imbalanced Covariates for imputation ", k, " using ", - weights_method, " and ", form_name, " formulas"), format = 'pipe'), sep = "\n") + + if (imbalanced_covars > 0){ + cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %sout of %s domains, + remain imbalanced with a remaining median absolute value correlation/std mean difference of %s (range= %s):\n", + imbalanced_covars, + total_covars, + percentage_imbalanced, + remaining_imbalanced_domains, + total_domains, + remaining_avg_abs_corr, + remaining_corr_range)) + + cat("\n") + cat(knitr::kable(bal_summary_exp, + caption = sprintf("Imbalanced Covariates for imputation %s using %s and %s formulas", + k, weights_method, form_name), + format = 'pipe'), sep = "\n") + } + else{ + cat(sprintf("No covariates remain imbalanced for imputation %s using %s and %s formulas. \n", + k, weights_method, form_name)) + } + cat("\n") cat("\n") - } - else { - cat(paste0("As shown below, for exposure ", exposure, " using ", weights_method, ", and ", form_name, " formulas, ", - imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced, "%) covariates across time points corresponding to ", - remaining_imbalanced_domains, " out of ", total_domains, - " domains remain imbalanced with a remaining average absolute value correlation/std mean difference of ", - remaining_avg_abs_corr, " (range= ", remaining_corr_range, ") :"), "\n") - cat("\n") - cat(knitr::kable(bal_summary_exp, caption = paste0("Imbalanced covariates using ", - weights_method, " and ", form_name, " formulas"), format = 'pipe'), sep = "\n") + + } else { + + if (imbalanced_covars > 0){ + cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %s out of %s domains, + remain imbalanced with a remaining median absolute value correlation/std mean difference of %s (range= %s):\n", + imbalanced_covars, + total_covars, + percentage_imbalanced, + remaining_imbalanced_domains, + total_domains, + remaining_avg_abs_corr, + remaining_corr_range)) + + cat("\n") + cat(knitr::kable(bal_summary_exp, + caption = sprintf("Imbalanced covariates using %s and %s formulas", weights_method, form_name), + format = 'pipe'), sep = "\n") + } + else{ + cat(sprintf("No covariates remain imbalanced using %s and %s formulas. \n", + weights_method, form_name)) + } cat("\n") cat("\n") } @@ -563,3 +660,4 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ all_bal_stats } + diff --git a/R/compareHelpers.R b/R/compareHelpers.R index a5a96cc7..2b0fd76c 100644 --- a/R/compareHelpers.R +++ b/R/compareHelpers.R @@ -17,9 +17,11 @@ #' reference = "h-h-h" ) get_reference_values <- function(d, reference) { + ref_vals <- sapply(seq_len(length(unlist(strsplit(reference, "-")))), function(x) { d[x, unlist(strsplit(reference, "-"))[x]] }) + ref_vals } @@ -41,11 +43,13 @@ get_reference_values <- function(d, reference) { #' r <- get_comparison_values(d = d, #' comp_histories = c("h-h-h", "h-h-l")) get_comparison_values <- function(d, comp_histories) { + comp_vals <- sapply(comp_histories, function(comp) { sapply(seq_len(length(unlist(strsplit(comp, "-")))), function(x) { d[x, unlist(strsplit(comp, "-"))[x]] }) }) + t(comp_vals) } @@ -60,6 +64,7 @@ get_comparison_values <- function(d, comp_histories) { #' @return contrasts #' @export create_custom_contrasts <- function(d, reference, comp_histories, exposure, preds) { + if (is.na(reference) | is.null(comp_histories)) { return(NULL) # Invalid input, return early } @@ -121,7 +126,8 @@ create_custom_comparisons <- function(preds, ref_vals, comp_vals, exposure) { #' @export add_histories <- function(p, d) { - if((is.list(p)) & length(p) == 1){ + + if((is.list(p)) && length(p) == 1){ history <- matrix(data = NA, nrow = nrow(p[[1]]), ncol = 1) # Get histories from the first element p <- p[[1]] } @@ -150,6 +156,7 @@ add_histories <- function(p, d) { } else { #comps for (i in seq_len(nrow(p))) { + temp <- as.character(p$term[i]) pair <- lapply(1:2, function(y) { a <- sapply(strsplit(temp, " - "), "[", y) @@ -209,7 +216,7 @@ perform_multiple_comparison_correction <- function(comps, reference, comp_histor cat("\n") cat(paste0("Conducting multiple comparison correction using the ", method, " method."), "\n") cat("\n") - corr_p <-stats::p.adjust(comps$p.value, method = method) + corr_p <- stats::p.adjust(comps$p.value, method = method) comps <- cbind(comps, p.value_corr = corr_p) } else { diff --git a/R/compareHistories.R b/R/compareHistories.R index 0eaa4d03..56689fa0 100644 --- a/R/compareHistories.R +++ b/R/compareHistories.R @@ -4,9 +4,6 @@ #' histories (pooling for imputed data), before conducting contrast comparisons #' (pooling for imputed data), correcting for multiple comparisons, and then #' plotting results. - -#' @importFrom gtools permutations -#' @importFrom stringr str_count #' @seealso {[marginaleffects::avg_predictions()], #' } #' @seealso {[marginaleffects::hypotheses()], @@ -18,8 +15,6 @@ #' @param exposure_time_pts list of integers at which weights will be #' created/assessed that correspond to time points when exposure wass measured #' @param outcome name of outcome variable with ".timepoint" suffix -#' @param tv_confounders list of time-varying confounders with ".timepoint" -#' suffix #' @param model list of model outputs from fitModel() #' @param epochs (optional) data frame of exposure epoch labels and values #' @param hi_lo_cut (optional) list of two numbers indicating quantile values @@ -85,13 +80,11 @@ #' r <- compareHistories(exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' model = m, #' save.out = FALSE) #' r <- compareHistories(exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' model = m, #' reference = "l-l-l", #' comparison = "h-h-h", @@ -99,14 +92,13 @@ #' r <- compareHistories(exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' model = m, #' reference = "l-l-l", #' comparison = c("h-h-h", "h-l-l"), #' save.out = FALSE) -compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, epochs = NULL, hi_lo_cut = NULL, +compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, model, epochs = NULL, hi_lo_cut = NULL, reference = NA, comparison = NULL, mc_comp_method = "BH", dose_level = "h", exp_lab = NA, out_lab = NA, colors = "Dark2", verbose = TRUE, save.out = TRUE ) { @@ -114,6 +106,9 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ if (missing(home_dir)) { stop("Please supply a home directory.", call. = FALSE) } + else if(!is.character(home_dir)){ + stop("Please provide a valid home directory path as a string if you wish to save output locally.", call. = FALSE) + } else if(!dir.exists(home_dir)) { stop("Please provide a valid home directory path if you wish to save output locally.", call. = FALSE) } @@ -122,21 +117,49 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } + else if(!is.character(exposure) || length(exposure) != 1){ + stop("Please supply a single exposure as a character.", call. = FALSE) + } + if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } + else if(!is.character(outcome) || length(outcome) != 1){ + stop("Please supply a single outcome as a character.", call. = FALSE) + } + if (missing(exposure_time_pts)){ stop("Please supply the exposure time points at which you wish to create weights.", call. = FALSE) } - if (missing(tv_confounders)){ - stop("Please supply a list of time-varying confounders.", call. = FALSE) + else if(!is.numeric(exposure_time_pts)){ + stop("Please supply a list of exposure time points as integers.", call. = FALSE) } + if (missing(model)){ stop("Please supply a list of model output", call. = FALSE) } - if(!inherits(model, "list")){ + else if(!is.list(model) || is.data.frame(model)){ stop("Please provide a list of model output from the fitModel function.", call. = FALSE) } + else if (sum(sapply(model, function(x) { + inherits(x, "svyglm")})) != length(model)){ + stop("Please supply a model as a list from the createWeights function.", call. = FALSE) + } + + if(!is.logical(verbose)){ + stop("Please set verbose to either TRUE or FALSE.", call. = FALSE) + } + else if(length(verbose) != 1){ + stop("Please provide a single TRUE or FALSE value to verbose.", call. = FALSE) + } + + if(!is.logical(save.out)){ + stop("Please set save.out to either TRUE or FALSE.", call. = FALSE) + } + else if(length(save.out) != 1){ + stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE) + } + if(save.out){ @@ -150,8 +173,8 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ } } - exposure_type <- ifelse(inherits(model[[1]]$data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric"), - "continuous", "binary") + exposure_type <- if(inherits(model[[1]]$data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric")) + "continuous" else "binary" ints <- gsub(" ", "", as.character(unlist(strsplit(as.character(unlist(model[[1]]$terms)), "\\+")))) @@ -161,7 +184,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ #history inspection --done early bc function deals with epochs, etc. #print history sample distribution if (verbose){ - eval_hist(data = model[[1]]$data, exposure, tv_confounders, epochs, + eval_hist(data = model[[1]]$data, exposure, epochs, exposure_time_pts, hi_lo_cut, reference, comparison, verbose) } @@ -172,7 +195,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ data <- lapply(model, function(x) { x$data }) data <- do.call("rbind", data) } - if(!is.null(names(model))){ #single df (not imputed) + else if(!is.null(names(model))){ #single df (not imputed) data <- model[[1]]$data } @@ -182,7 +205,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ } else{ - if( !is.data.frame(epochs) | ncol(epochs) != 2 | sum(colnames(epochs) == c("epochs", "values")) != ncol(epochs)){ + if( !is.data.frame(epochs) || ncol(epochs) != 2 || sum(colnames(epochs) == c("epochs", "values")) != ncol(epochs)){ stop("If you supply epochs, please provide a dataframe with two columns of epochs and values.", call. = FALSE) } @@ -339,7 +362,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ } #conduct all pairwise comparisons if no ref/comparisons were specified by user - if (is.na(reference) & is.null(comp_histories)){ + if (is.na(reference) && is.null(comp_histories)){ # Pairwise comparisons; don't need to use custom class comps <- lapply(preds, function(y){ y |> marginaleffects::hypotheses("pairwise") @@ -371,8 +394,11 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ preds_pool <- add_dose(preds_pool, dose_level) # If the user specified reference and comparison groups, subset pred_pool for inspection and plotting - if (!is.na(reference) & !is.null(comp_histories)) { - preds_pool <- preds_pool[preds_pool$history %in% c(reference, comp_histories),] + + if (!is.na(reference) && !is.null(comp_histories)) { + # preds_pool <- preds_pool %>% + # dplyr::filter(history %in% c(reference, comp_histories)) + preds_pool <- preds_pool[preds_pool$history %in% c(reference, comp_histories), , drop = FALSE ] } @@ -380,9 +406,11 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ if(is.null(hi_lo_cut)){ hi_lo_cut <- "median+-.1" } + # Makes table of pooled average estimates and saves out - sink(paste0(home_dir, "/histories/", exposure, "-", outcome, "_pooled_estimated_means_hi_lo=", - paste(hi_lo_cut, collapse = "_"), ".html")) + outfile <- sprintf("%s/histories/%s-%s_pooled_estimated_means_hi_lo=%s.html", + home_dir, exposure, outcome, paste(hi_lo_cut, collapse = "_") ) + sink(outfile) stargazer::stargazer( as.data.frame(preds_pool), type = "html", @@ -391,9 +419,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ summary = FALSE, rownames = FALSE, header = FALSE, - out = paste0(home_dir, "/histories/", exposure, "-", outcome, "_pooled_estimated_means_hi_lo=", - hi_lo_cut, ".html" - ) + out = outfile ) sink() } @@ -401,7 +427,10 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ if (verbose){ cat("\n") cat("Below are the pooled average predictions by user-specified history:") # - cat(knitr::kable(preds_pool, format = 'pipe', digits = 4), sep = "\n") + cat(knitr::kable(preds_pool, + format = 'pipe', + digits = 4), + sep = "\n") cat("\n") } @@ -433,15 +462,15 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ })) - - if (save.out){ if(is.null(hi_lo_cut)){ hi_lo_cut <- "median+-.1" } + # Makes table of pooled comparisons and saves out - sink(paste0(home_dir, "/histories/", exposure, "-", outcome, "_pooled_comparisons_hi_lo=", - paste(hi_lo_cut, collapse = "_"), ".html")) + outfile <- sprintf("%s/histories/%s-%s_pooled_comparisons_hi_lo=%s.html", + home_dir, exposure, outcome, paste(hi_lo_cut, collapse = "_") ) + sink(outfile) stargazer::stargazer( as.data.frame(comps_pool), type = "html", @@ -450,15 +479,17 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ summary = FALSE, rownames = FALSE, header = FALSE, - out = paste0(home_dir, "/histories/", exposure, "-", outcome, "_pooled_comparisons_hi_lo=", - paste(hi_lo_cut, collapse = "_"), ".html") + out = outfile ) sink() } if (verbose){ cat("\n") cat(paste0("USER ALERT: please inspect the following pooled comparisons :"), "\n") - cat(knitr::kable(comps_pool, format = 'pipe', digits = 4), sep = "\n") + cat(knitr::kable(comps_pool, + format = 'pipe', + digits = 4), + sep = "\n") cat("\n") cat("\n") } @@ -477,8 +508,11 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ preds <- add_dose(preds, dose_level) # If the user specified reference and comparison groups, subset preds for inspection and plotting - if (!is.na(reference) & !is.null(comp_histories)) { - preds_pool <- preds_pool[preds_pool$history %in% c(reference, comp_histories),] + + if (!is.na(reference) && !is.null(comp_histories)) { + # preds <- preds %>% + # dplyr::filter(history %in% c(reference, comp_histories)) + preds <- preds[preds$history %in% c(reference, comp_histories), , drop = FALSE ] } @@ -486,10 +520,15 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ if(is.null(hi_lo_cut)){ hi_lo_cut <- "median+-.1" } + # Makes table of average estimates a lapply(seq_len(length(preds)), function(x) { y <- preds[[x]] - sink(paste0(home_dir, "/histories/", exposure, "-", outcome, "_estimated_means_hi_lo=", paste(hi_lo_cut, collapse = "_"), ".html")) + + outfile <- sprintf("%s/histories/%s-%s_estimated_means_hi_lo=%s.html", + home_dir, exposure, outcome, paste(hi_lo_cut, collapse = "_") ) + + sink(outfile) stargazer::stargazer( as.data.frame(y), type = "html", @@ -498,17 +537,20 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ summary = FALSE, rownames = FALSE, header = FALSE, - out = paste0(home_dir, "/histories/", exposure, "-", outcome, "_estimated_means_hi_lo=", paste(hi_lo_cut, collapse = "_"), ".html" - ) + out = outfile ) sink() + }) } if (verbose){ cat("\n") cat("Below are the average predictions by user-specified history:", "\n") # Not sure if we need to print this? - cat(knitr::kable(preds, format = 'pipe', digits = 4), sep = "\n") + cat(knitr::kable(preds, + format = 'pipe', + digits = 4), + sep = "\n") cat("\n") } @@ -540,9 +582,12 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ if(is.null(hi_lo_cut)){ hi_lo_cut <- "median+-.1" } + + outfile <- sprintf("%s/histories/%s-%s_comparisons_hi_lo=%s.html", + home_dir, exposure, outcome, paste(hi_lo_cut, collapse = "_") ) + # Makes table of comparisons and saves out - sink(paste0(home_dir, "/histories/", exposure, "-", outcome, "_comparisons_hi_lo=", - paste(hi_lo_cut, collapse = "_"), ".html")) + sink(outfile) stargazer::stargazer( as.data.frame(comps), type = "html", @@ -551,17 +596,19 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ summary = FALSE, rownames = FALSE, header = FALSE, - out = paste0(home_dir, "/histories/", exposure, "-", outcome, "_comparisons_hi_lo=", - hi_lo_cut, ".html" - ) + out = outfile ) sink() + } if (verbose) { cat("\n") cat(paste0("USER ALERT: please inspect the following comparisons:"), "\n") - cat(knitr::kable(comps, format = 'pipe', digits = 2), sep = "\n") + cat(knitr::kable(comps, + format = 'pipe', + digits = 2), + sep = "\n") cat("\n") cat("\n") } @@ -574,9 +621,13 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ if(!inherits(colors, "character")){ stop("Please provide a character string of a Brewer palette name or list of colors for plotting.", call. = FALSE) } - else if (length(colors) > 1 & length(colors) != nrow(epochs) + 1) { - stop(paste0('Please provide either: ', nrow(epochs) + 1, - ' different colors, a color palette, or leave this entry blank.'), call. = FALSE) + else if (length(colors) > 1 && length(colors) != nrow(epochs) + 1) { + stop( + # paste0('Please provide either: ', nrow(epochs) + 1, + # ' different colors, a color palette, or leave this entry blank.'), + sprint("Please provide either %s different colors, a Brewer color palette, or leave this entry blank. \n", + nrow(epochs) + 1), + call. = FALSE) } if (is.na(exp_lab)){ @@ -592,54 +643,63 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ comparisons$history <- as.factor(comparisons$history) comparisons$dose <- as.factor(comparisons$dose) - comparisons <- comparisons %>% - dplyr::arrange(dose) # Order by dose + # comparisons <- comparisons %>% + # dplyr::arrange(dose) # Order by dose + comparisons <- comparisons[order(comparisons$dose), , drop = FALSE] if (length(colors) > 1) { # If user input a list of colors - p <- ggplot2::ggplot(data = comparisons, aes(x = estimate, y = history, color = dose)) + + p <- ggplot2::ggplot(data = comparisons, ggplot2::aes(x = estimate, y = history, color = dose)) + ggplot2::geom_point(size = 5) + ggplot2::scale_color_manual(values = colors) + ggplot2::scale_y_discrete(limits = c(as.character(comparisons$history)), expand = c(0, 0.2)) + - ggplot2::geom_errorbarh(aes(xmin = low_ci, xmax = high_ci), height = 0.6) + + ggplot2::geom_errorbarh(ggplot2::aes(xmin = low_ci, xmax = high_ci), height = 0.6) + ggplot2::xlab(paste0("Predicted ", out_lab, " Value")) + ggplot2::ylab(paste0(exp_lab, " Exposure History")) + ggplot2::xlim(min(comparisons$low_ci) - 1 * sd(comparisons$low_ci), max(comparisons$high_ci) + 1 * sd(comparisons$high_ci)) + ggplot2::theme(text = ggplot2::element_text(size = 14)) + - ggplot2::theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), - panel.background = element_blank(), axis.line = ggplot2::element_line(colour = "black")) + ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = element_blank(), + panel.background = ggplot2::element_blank(), axis.line = ggplot2::element_line(colour = "black")) if (save.out){ - ggplot2::ggsave(paste0(home_dir, "/plots/", exposure, "-", outcome, ".jpeg"), plot = p) + ggplot2::ggsave( + # paste0(home_dir, "/plots/", exposure, "-", outcome, ".jpeg"), + sprintf("%s/plots/%s-%s.jpeg", + home_dir, exposure, outcome), + plot = p) } if(verbose){ print(p) } } else { # If user lists a palette (default) - p <- ggplot2::ggplot(data = comparisons, aes(x = estimate, y = history, color = dose)) + + p <- ggplot2::ggplot(data = comparisons, ggplot2::aes(x = estimate, y = history, color = dose)) + ggplot2::geom_point(size = 5) + ggplot2::scale_colour_brewer(palette = colors) + ggplot2::scale_y_discrete(limits = c(as.character(comparisons$history)), expand = c(0, 0.2)) + - ggplot2::geom_errorbarh(aes(xmin = low_ci, xmax = high_ci), height = 0.6) + + ggplot2::geom_errorbarh(ggplot2::aes(xmin = low_ci, xmax = high_ci), height = 0.6) + ggplot2::xlab(paste0("Predicted ", out_lab, " Value")) + ggplot2::ylab(paste0(exp_lab, " Exposure History")) + ggplot2::xlim(min(comparisons$low_ci) - 1 * sd(comparisons$low_ci), max(comparisons$high_ci) + 1 * sd(comparisons$high_ci)) + ggplot2::theme(text = ggplot2::element_text(size = 14)) + - ggplot2::theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), - panel.background = element_blank(), axis.line = ggplot2::element_line(colour = "black")) + + ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_blank(), axis.line = ggplot2::element_line(colour = "black")) + ggplot2::guides(fill = ggplot2::guide_legend(title="Dosage")) if (save.out){ - ggplot2::ggsave(paste0(home_dir, "/plots/", exposure, "-", outcome, ".jpeg"), plot = p) + ggplot2::ggsave( + # paste0(home_dir, "/plots/", exposure, "-", outcome, ".jpeg"), + sprintf("%s/plots/%s-%s.jpeg", + home_dir, exposure, outcome), + plot = p) } if(verbose){ print(p) } } - if (save.out & verbose){ + if (save.out && verbose){ cat("\n") cat("See the '/plots/' folder for graphical representations of results.") } diff --git a/R/createFormulas.R b/R/createFormulas.R index 3c79aa77..33e45648 100644 --- a/R/createFormulas.R +++ b/R/createFormulas.R @@ -10,9 +10,9 @@ #' @param exposure_time_pts list of integers at which weights will be #' created/assessed that correspond to time points when exposure wass measured #' @param outcome name of outcome variable with ".timepoint" suffix -#' @param tv_confounders list of time-varying confounders with ".timepoint" +#' @param tv_confounders (optional) list of time-varying confounders with ".timepoint" #' suffix -#' @param ti_confounders list of time invariant confounders +#' @param ti_confounders (optional) list of time invariant confounders #' @param type type of formula to create from 'full' (includes all lagged #' time-varying confounders), 'short' (includes time-varying confounders at #' t-1 lag only), or 'update' (adds to 'short' formulas any imbalanced @@ -73,7 +73,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "weighted", #' weights = w, #' formulas = f, @@ -89,13 +88,16 @@ #' save.out = FALSE) -createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, bal_stats = NULL, concur_conf = NULL, +createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = NULL, bal_stats = NULL, concur_conf = NULL, keep_conf = NULL, custom = NULL, verbose = TRUE, save.out = TRUE ){ if (save.out) { if (missing(home_dir)) { stop("Please supply a home directory.", call. = FALSE) } + else if(!is.character(home_dir)){ + stop("Please provide a valid home directory path as a string if you wish to save output locally.", call. = FALSE) + } else if(!dir.exists(home_dir)) { stop("Please provide a valid home directory path if you wish to save output locally.", call. = FALSE) } @@ -104,59 +106,90 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_co if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } + else if(!is.character(exposure) || length(exposure) != 1){ + stop("Please supply a single exposure as a character.", call. = FALSE) + } + if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } + else if(!is.character(outcome) || length(outcome) != 1){ + stop("Please supply a single outcome as a character.", call. = FALSE) + } + if (missing(exposure_time_pts)){ stop("Please supply the exposure time points at which you wish to create weights.", call. = FALSE) } + else if(!is.numeric(exposure_time_pts)){ + stop("Please supply a list of exposure time points as integers.", call. = FALSE) + } + if (missing(tv_confounders)){ - stop("Please supply a list of time-varying confounders.", call. = FALSE) + warning("You have not specified any time-varying confounders. If you have time-varying exposure, please list all wide exposure variables as tv_confounders.", call. = FALSE) + tv_confounders <- character(0) } + else if(!is.character(tv_confounders)){ + stop("Please provide a list of time-varying confounders as character strings.") + } + if (missing(ti_confounders)){ - stop("Please supply a list of time invariant confounders.", call. = FALSE) + stop("You have not specified time invariant confounders.", call. = FALSE) + # ti_confounders <- NULL } + if (missing(type)){ stop("Please supply a 'full', 'short', or 'update' type", call. = FALSE) } - - if(!is.character(type)){ + else if(!is.character(type)){ stop("Please provide a character string type from the following list: 'short', 'full', or 'update'", call. = FALSE) } - if(! type %in% c("short", "full", "update")){ - stop("Please provide a type from the following list: 'short', 'full', or 'update'", call. = FALSE) + else if(! type %in% c("short", "full", "update") || length(type) != 1){ + stop("Please provide a single type from the following list: 'short', 'full', or 'update'", call. = FALSE) } - if (type != "update" & !is.null(bal_stats)){ + + if (type != "update" && !is.null(bal_stats)){ stop ("Please only provide balance statistics for the type 'update'.", call. = FALSE) } - if(!is.null(bal_stats) & !is.data.frame(bal_stats)){ + if(!is.null(bal_stats) && !is.data.frame(bal_stats)){ stop("Please provide a data frame of balance statistics from the assessBalance function.", call. = FALSE) } + if(!is.logical(verbose)){ + stop("Please set verbose to either TRUE or FALSE.", call. = FALSE) + } + else if(length(verbose) != 1){ + stop("Please provide a single TRUE or FALSE value to verbose.", call. = FALSE) + } + + if(!is.logical(save.out)){ + stop("Please set save.out to either TRUE or FALSE.", call. = FALSE) + } + else if(length(save.out) != 1){ + stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE) + } + time_varying_covariates <- tv_confounders all_covars <- c(tv_confounders, ti_confounders) if (!is.null(custom)){ - if (length(custom) != length(exposure_time_pts) | !inherits(custom, "list")){ + if (length(custom) != length(exposure_time_pts) || !is.list(custom) || is.data.frame(custom)){ stop("If you wish to supply custom formulas, please provide a list of formulas for each exposure time point.", call. = FALSE) } forms <- custom } else{ - if (type != "update" & !is.null(bal_stats)){ + if (type != "update" && !is.null(bal_stats)){ stop ("Please only provide balance statistics for the type 'update'.", call. = FALSE) } if(save.out){ - #create parent directory forms_dir1 <- file.path(home_dir, "formulas") if (!dir.exists(forms_dir1)) { dir.create(forms_dir1) } - # Create type directory forms_dir <- file.path(home_dir, "formulas", type) if (!dir.exists(forms_dir)) { dir.create(forms_dir) @@ -174,59 +207,75 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_co #identifying lagged tv confounders relative to time point based on formula type if (type == "full"){ + if(verbose){ message("USER ALERT: Please manually inspect the full balancing formula below:") } + time_var_include <- time_varying_covariates[as.numeric(sapply(strsplit(time_varying_covariates, "\\."), "[", 2)) < time] + } else if (type == "short"){ + if(verbose){ message("USER ALERT: Please manually inspect the short balancing formula below that includes time-varying confounders at t-1 only:") } + time_var_include <- time_varying_covariates[as.numeric(sapply(strsplit(time_varying_covariates, "\\."), "[", 2)) == - exposure_time_pts[x-1]] + exposure_time_pts[x - 1]] + } else if (type == "update"){ + if(is.null(bal_stats)){ - stop("Please provide balance statistics if you wish to run the update version of this function", call. = FALSE) + stop("Please provide balance statistics from the assessBalance() function if you wish to run the update version of this function", + call. = FALSE) + } + else if (!is.data.frame(bal_stats)){ + stop("Please provide balance statistics from the assessBalance() function if you wish to run the update version of this function", + call. = FALSE) } if(verbose){ - message("USER ALERT: Please manually inspect the updated balancing formula below that includes time-varying confounders at t-1 and those greater at further lags that remained imbalanced:") + message("USER ALERT: Please manually inspect the updated balancing formula below that includes time-varying confounders at t-1 and + those greater at further lags that remained imbalanced:") } time_var_include <- time_varying_covariates[as.numeric(sapply(strsplit(time_varying_covariates, "\\."), "[", 2)) == - exposure_time_pts[x-1]] + exposure_time_pts[x - 1]] if (x > 1) { - new <- bal_stats %>% - dplyr::filter(balanced == 0) %>% - dplyr::filter(exp_time == time, as.numeric(covar_time) < exposure_time_pts[x - 1], - as.numeric(covar_time) > 0) %>% # Finds any lagged imbalanced covars - dplyr::select(covariate) + + new <- bal_stats[bal_stats$balanced == 0 & + bal_stats$exp_time == time & + as.numeric(bal_stats$covar_time) < exposure_time_pts[x - 1] & + as.numeric(bal_stats$covar_time) > 0, ] - # Renames factors (that were appended w/ level) if (nrow(new) > 0) { - new$covariate[sapply(strsplit(new$covariate, "_"), `[`, 1) %in% factor_covariates] <- - sapply(strsplit(new$covariate, "_"), `[`, 1)[sapply(strsplit(new$covariate, "_"), `[`, 1) %in% factor_covariates] + + new <- new[, "covariate"] new <- as.character(unlist(new)) time_var_include <- c(time_var_include, new) if (verbose){ - cat(paste0("For ", exposure, " at exposure time point ", time , - ", the following covariate(s) will be added to the short balancing formula: "), paste(new, collapse = ", "), "\n") + + cat(sprintf("For %s at exposure time point %s the following covariate(s) will be added to the short balancing formula: + %s \n", + exposure, time, paste(new, collapse = ", "))) + cat("\n") } } else{ if (verbose) { - cat(paste0("For ", exposure, " at exposure time point ", time , - " no time-varying confounders at additional lags were added."), "\n") + + cat(sprintf("For %s at exposure time point %s no time-varying confounders at additional lags were added. \n", + exposure, time )) cat("\n") } } @@ -276,15 +325,17 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_co # Prints form for user inspection if (verbose){ - cat(paste0("The ", type, " formula for ", exposure, "-", outcome, " at ", exposure, " time point ", - as.character(time), " is:"), "\n") + cat(sprintf("The %s formula for %s - %s at %s time point %s is: \n", + type, exposure, outcome, exposure, as.character(time))) print(f) cat("\n") } # Appends the form string to forms_csv - forms_csv <- c(forms_csv, paste0(type, " formula for ", exposure, "-", outcome, " at ", exposure, - " time point ", as.character(time), ":")) + forms_csv <- c(forms_csv, + sprintf("%s formula for %s-%s at %s time point %s:", + type, exposure, outcome, exposure, as.character(time))) + forms_csv <- c(forms_csv, paste(exposure, "~", paste0(vars_to_include[order(vars_to_include)], sep = "", collapse = " + "))) # Assigns the form to forms list @@ -293,11 +344,14 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_co if(save.out){ # Writes forms_csv to a CSV file - forms_csv_file <- paste0(forms_dir, "/", type, "_", exposure, "-", outcome, "_", type, "_balancing_formulas.csv") + forms_csv_file <- sprintf("%s/%s_%s-%s_%s_balancing_formulas.csv", + forms_dir, type, exposure, outcome, type) + writeLines(forms_csv, con = forms_csv_file) # writes to rds - forms_rds_file <- paste0(forms_dir, "/", type, "_", exposure, "-", outcome, "_", type, "_balancing_formulas.rds") + forms_rds_file <- sprintf("%s/%s_%s-%s_%s_balancing_formulas.rds", + forms_dir, type, exposure, outcome, type) saveRDS(forms, file = forms_rds_file) } diff --git a/R/createWeights.R b/R/createWeights.R index 978b5d34..94b4f864 100644 --- a/R/createWeights.R +++ b/R/createWeights.R @@ -5,7 +5,6 @@ #' relevant confounders. #' #' @export -#' @importFrom WeightIt weightitMSM #' @seealso {[WeightIt::weightitMSM()], #' } #' @param home_dir path to home directory (required if 'save.out' = TRUE) @@ -68,6 +67,9 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = if (missing(home_dir)) { stop("Please supply a home directory.", call. = FALSE) } + else if(!is.character(home_dir)){ + stop("Please provide a valid home directory path as a string if you wish to save output locally.", call. = FALSE) + } else if(!dir.exists(home_dir)) { stop("Please provide a valid home directory path if you wish to save output locally.", call. = FALSE) } @@ -77,35 +79,63 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = stop("Please supply data as either a dataframe with no missing data or imputed data in the form of a mids object or path to folder with imputed csv datasets.", call. = FALSE) } + else if (!inherits(data, "mids") && !is.data.frame(data) && !is.list(data)) { + stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", + call. = FALSE) + } + else if(is.list(data) && !is.data.frame(data)){ + if (sum(sapply(data, is.data.frame)) != length(data)){ + stop("Please supply a list of data frames that have been imputed.", call. = FALSE) + } + } + if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } + else if(!is.character(exposure) || length(exposure) != 1){ + stop("Please supply a single exposure as a character.", call. = FALSE) + } + if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } + else if(!is.character(outcome) || length(outcome) != 1){ + stop("Please supply a single outcome as a character.", call. = FALSE) + } if (missing(formulas)){ stop("Please supply a list of balancing formulas.", call. = FALSE) } + else if(!is.list(formulas) | is.data.frame(formulas)){ + stop("Please provide a list of formulas for each exposure time point", call. = FALSE) + } if(!is.character(method)){ stop("Please provide as a character string a weights method from this list: 'ps', 'glm', 'gbm', 'bart', 'super', 'cbps'.", call. = FALSE) } - if(! method %in% c("ps", "glm", "gbm", "bart", "super", "cbps")){ + else if(! method %in% c("ps", "glm", "gbm", "bart", "super", "cbps")){ stop("Please provide a weights method from this list: 'ps', 'glm', 'gbm', 'bart', 'super', 'cbps'.", call. = FALSE) } - if (!inherits(data, "mids") && !is.data.frame(data) && !is.list(data)) { - stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", - call. = FALSE) + + if(!is.logical(verbose)){ + stop("Please set verbose to either TRUE or FALSE.", call. = FALSE) + } + else if(length(verbose) != 1){ + stop("Please provide a single TRUE or FALSE value to verbose.", call. = FALSE) } - if(!is.list(formulas)) { - stop("Please provide a list of formulas for each exposure time point", call. = FALSE) + if(!is.logical(save.out)){ + stop("Please set save.out to either TRUE or FALSE.", call. = FALSE) + } + else if(length(save.out) != 1){ + stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE) } + + weights_method <- method form_name <- sapply(strsplit(names(formulas[1]), "_form"), "[", 1) @@ -125,9 +155,12 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } if (read_in_from_file == "yes") { + tryCatch({ - weights <- readRDS(paste0(home_dir, "/weights/", exposure, "-", outcome, "_", form_name, "_", - weights_method, "_fit.rds")) + # weights <- readRDS(paste0(home_dir, "/weights/", exposure, "-", outcome, "_", form_name, "_", + # weights_method, "_fit.rds")) + weights <- readRDS(sprintf("%s/weights/%s-%s_%s_%s_fit.rds", + home_dir, exposure, outcome, form_name, weights_method)) if (verbose){ message("Reading in balancing weights from the local folder.") @@ -147,32 +180,33 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = # Helper function to calculate weights calculate_weights <- function(data, form, weights_method, SL.library) { - if (weights_method == "super") { - fit <- weightitMSM(form, - data = data, - method = weights_method, - stabilize = TRUE, - density = "dt_2", #do we want this? - use.kernel = TRUE, - include.obj = TRUE, - SL.library = SL.library, - over = FALSE) + if(weights_method == "super"){ + fit <- WeightIt::weightitMSM(form, + data = data, + method = weights_method, + stabilize = TRUE, + density = "dt_2", #do we want this? + use.kernel = TRUE, + include.obj = TRUE, + SL.library = SL.library, + over = FALSE) } - else { - fit <- weightitMSM(form, - data = data, - method = weights_method, - stabilize = TRUE, - density = "dt_2", #do we want this? - use.kernel = TRUE, - include.obj = TRUE, - over = FALSE) + else{ + fit <- WeightIt::weightitMSM(form, + data = data, + method = weights_method, + stabilize = TRUE, + density = "dt_2", #do we want this? + use.kernel = TRUE, + include.obj = TRUE, + over = FALSE) } fit } if (inherits(data, "mids")) { + # Cycling through imputed datasets weights <- lapply(seq_len(data$m), function(i) { d <- as.data.frame(mice::complete(data, i)) @@ -190,31 +224,48 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = d$weights <- fit$weights if (verbose){ - cat(paste0("For imputation ", i, " and the ", weights_method, " weighting method, the median weight value is ", - round(median(fit$weights), 2) ," (SD= ", round(sd(fit$weights), 2), "; range= ", round(min(fit$weights), 2), "-", - round(max(fit$weights), 2), ")."), "\n") + + cat(sprintf("For imputation %s and the %s weighting method, the median weight value is %s + (SD= %s; range= %s-%s ). \n", + i, + weights_method, + round(median(fit$weights), 2), + round(sd(fit$weights), 2), + round(min(fit$weights), 2), + round(max(fit$weights), 2))) + cat('\n') } if(save.out){ # Save weights merged with ID variable - write.csv(x = d, file = paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", form_name, - "_", weights_method, "_", i, ".csv")) + write.csv(x = d, file = + # paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", form_name, + # "_", weights_method, "_", i, ".csv")) + sprintf("%s/weights/values/%s-%s_%s_%s_%s.csv", + home_dir, exposure, outcome, form_name, weights_method, i)) } # Writes image of the histogram of weights to assess heavy tails - p <- ggplot2::ggplot(data = as.data.frame(fit$weight), aes(x = fit$weight)) + + p <- ggplot2::ggplot(data = as.data.frame(fit$weight), ggplot2::aes(x = fit$weight)) + ggplot2::geom_histogram(color = 'black', bins = 15) + ggplot2::xlab("Weights") + - ggplot2::ggtitle(paste0("Distribution of ", weights_method, " weights")) + ggplot2::ggtitle( + # paste0("Distribution of ", weights_method, " weights")) + sprintf("Distribution of %s weights", + weights_method)) if(verbose){ print(p) } if(save.out){ - ggsave(paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, "_", form_name, - "_", weights_method, "_", i, ".png"), plot = p, height = 8, width = 14) + ggsave( + # paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, "_", form_name, + # "_", weights_method, "_", i, ".png"), + sprintf("%s/weights/histograms/Hist_%s-%s_%s_%s_%s.png", + home_dir, exposure, outcome, form_name, weights_method, i), + plot = p, height = 8, width = 14) } fit @@ -227,7 +278,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } } - else if(inherits(data, "list")){ + else if(is.list(data) && !is.data.frame(data)){ # Cycling through list of imputed datasets weights <- lapply(seq_len(length(data)), function(i) { d <- data[[i]] @@ -245,21 +296,33 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = d$weights <- fit$weights if (verbose){ - cat(paste0("For imputation", i, " and ", weights_method, - ", weighting method, the median weight value is ", round(median(fit$weights), 2) , - " (SD= ", round(sd(fit$weights), 2), "; range= ", round(min(fit$weights), 2), "-", - round(max(fit$weights), 2), ")."), "\n") + # cat(paste0("For imputation", i, " and ", weights_method, + # ", weighting method, the median weight value is ", round(median(fit$weights), 2) , + # " (SD= ", round(sd(fit$weights), 2), "; range= ", round(min(fit$weights), 2), "-", + # round(max(fit$weights), 2), ")."), "\n") + cat(sprintf("For imputation %s and the %s weighting method, the median weight value is %s + (SD= %s; range= %s-%s ). \n", + i, + weights_method, + round(median(fit$weights), 2), + round(sd(fit$weights), 2), + round(min(fit$weights), 2), + round(max(fit$weights), 2))) cat('\n') } if(save.out){ # Save weights merged with ID variable - write.csv(x = d, file = paste0(home_dir, "/weights/values/", exposure, "-", outcome, - "_", form_name, "_", weights_method, "_", i, ".csv")) + write.csv(x = d, + file = + # paste0(home_dir, "/weights/values/", exposure, "-", outcome, + # "_", form_name, "_", weights_method, "_", i, ".csv")) + sprintf("%s/weights/values/%s-%s_%s_%s_%s.csv", + home_dir, exposure, outcome, form_name, weights_method, i)) } # Writes image of the histogram of weights to assess heavy tails - p <- ggplot2::ggplot(data = as.data.frame(fit$weight), aes(x = fit$weight)) + + p <- ggplot2::ggplot(data = as.data.frame(fit$weight), ggplot2::aes(x = fit$weight)) + ggplot2::geom_histogram(color = 'black', bins = 15) + ggplot2::xlab("Weights") + ggplot2::ggtitle(paste0("Distribution of ", weights_method, " weights")) @@ -269,9 +332,12 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } if(save.out){ - ggplot2::ggsave(paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, - "_", form_name, "_", weights_method, "_", i, ".png"), plot = p, - height = 8, width = 14) + ggplot2::ggsave( + # paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, + # "_", form_name, "_", weights_method, "_", i, ".png"), + sprintf("%s/weights/histograms/Hist_%s-%s_%s_%s_%s.png", + home_dir, exposure, outcome, form_name, weights_method, i), + plot = p, height = 8, width = 14) } fit @@ -301,9 +367,17 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = data$weights <- weights[[1]]$weights if (verbose){ - cat(paste0("For the ", weights_method, " weighting method, the median weight value is ", - round(median(data$weights), 2) , " (SD = ", round(sd(data$weights), 2), "; range = ", - round(min(data$weights), 2), "-", round(max(data$weights), 2), ")."), "\n") + # cat(paste0("For the ", weights_method, " weighting method, the median weight value is ", + # round(median(data$weights), 2) , " (SD = ", round(sd(data$weights), 2), "; range = ", + # round(min(data$weights), 2), "-", round(max(data$weights), 2), ")."), "\n") + cat(sprintf("For the %s weighting method, the median weight value is %s + (SD = %s; range = %s-%s. \n", + weights_method, + round(median(data$weights), 2), + round(sd(data$weights), 2), + round(min(data$weights), 2), + round(max(data$weights)))) + cat('\n') } @@ -311,23 +385,34 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = if(save.out){ # Save weights merged with ID variable - write.csv(x = data, file = paste0(home_dir, "/weights/values/", exposure, "-", outcome, - "_", form_name, "_", weights_method, ".csv")) + write.csv(x = data, file = + # paste0(home_dir, "/weights/values/", exposure, "-", outcome, + # "_", form_name, "_", weights_method, ".csv")) + sprintf("%s/weights/values/%s-%s_%s_%s.csv", + home_dir, exposure, outcome, form_name, weights_method)) } # Writes image of the histogram of weights to assess heavy tails - p <- ggplot2::ggplot(data = as.data.frame(weights[[1]]$weights), aes(x = weights[[1]]$weights)) + + p <- ggplot2::ggplot(data = as.data.frame(weights[[1]]$weights), ggplot2::aes(x = weights[[1]]$weights)) + ggplot2::geom_histogram(color = 'black', bins = 15) + ggplot2::xlab("Weights") + - ggplot2::ggtitle(paste0("Distribution of ", weights_method, " weights")) + ggplot2::ggtitle( + # paste0("Distribution of ", weights_method, " weights")) + sprintf("Distribution of %s weights", + weights_method)) if(verbose){ print(p) } if(save.out){ - ggplot2::ggsave(paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, - "_", form_name, "_", weights_method, ".png"), plot = p, height = 8, width = 14) + ggplot2::ggsave( + # paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, + # "_", form_name, "_", weights_method, ".png"), + sprintf("%s/weights/histograms/Hist_%s-%s_%s_%s.png", + home_dir, exposure, outcome, form_name, weights_method), + plot = p, height = 8, width = 14) + if (verbose){ cat("Weights have now been saved into the 'weights/values/' folder.") cat("\n") @@ -337,10 +422,14 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } if(save.out){ - saveRDS(weights, file = paste0(home_dir, "/weights/", exposure, "-", outcome, - "_", form_name, "_", weights_method, "_fit.rds")) + saveRDS(weights, file = + # paste0(home_dir, "/weights/", exposure, "-", outcome, + # "_", form_name, "_", weights_method, "_fit.rds")) + sprintf("%s/weights/%s-%s_%s_%s_fit.rds", + home_dir, exposure, outcome, form_name, weights_method)) if (verbose){ - cat("Weights models have been saved as an .rds object in the 'weights' folder.") + cat("\n") + cat("Weights models have been saved as an .rds object in the 'weights' folder.", "\n") } } diff --git a/R/eval_hist.R b/R/eval_hist.R index 95f4f42b..f6fed511 100644 --- a/R/eval_hist.R +++ b/R/eval_hist.R @@ -6,8 +6,6 @@ #' @param data data in wide format as: a data frame, list of imputed #' data frames, or mids object #' @param exposure name of exposure variable -#' @param tv_confounders list of time-varying confounders with ".timepoint" -#' suffix #' @param epochs (optional) data frame of exposure epoch labels and values #' @param time_pts list of integers at which weights will be #' created/assessed that correspond to time points when exposure was measured @@ -58,16 +56,13 @@ #' ref = "l-l-l", #' comps = "h-h-h") -eval_hist <- function(data, exposure, tv_confounders, epochs = NULL, time_pts, hi_lo_cut = NULL, ref = NA, comps = NULL, verbose = TRUE){ +eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, ref = NA, comps = NULL, verbose = TRUE){ - exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', time_pts[1])], "numeric"), "continuous", "binary") + # exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', time_pts[1])], "numeric"), "continuous", "binary") + exposure_type <- if (inherits(data[, paste0(exposure, '.', time_pts[1])], "numeric")) "continuous" else "binary" - time_varying_wide <- tv_confounders - time_varying_wide <- sort(time_varying_wide) - time_varying_wide <- c("ID", time_varying_wide) data_wide <- data - #new will always have exposure main effects (ether exposure time points or epochs) # Lists out exposure-epoch combos if( is.null(epochs)){ #making epochs time pts if not specified by user @@ -76,10 +71,10 @@ eval_hist <- function(data, exposure, tv_confounders, epochs = NULL, time_pts, h new <- data[, c("ID", paste(exposure, time_pts, sep = "."))] } else{ - #new will have cols for epochs new <- data.frame(ID = data_wide[, "ID"]) colnames(new) <- "ID" + # Averages exposure across time points that constitute the exposure epochs (e.g., infancy = 6 & 15) for (e in seq_len(nrow(epochs))) { epoch <- epochs[e, 1] @@ -88,40 +83,43 @@ eval_hist <- function(data, exposure, tv_confounders, epochs = NULL, time_pts, h # Finds data from each time point in each epoch, horizontally aligns all exposure values within the epoch for averaging for (l in seq_len(length(as.numeric(unlist(epochs[e, 2]))))) { level <- as.numeric(unlist(epochs[e, 2]))[l] - z <- data_wide[, names(data_wide)[grepl(exposure, names(data_wide))]] #finds exposure vars - z <- as.numeric(as.character(unlist(z[, sapply(strsplit(names(z), "\\."), "[", 2) == as.character(level)]))) + z <- as.data.frame(data_wide[, names(data_wide)[grepl(exposure, names(data_wide))]]) #finds exposure vars + cols <- colnames(z)[as.logical(sapply(strsplit(names(z), "\\."), "[", 2) == as.character(level))] + cols <- cols[!is.na(cols)] + z <- as.numeric(as.character(unlist(z[, cols]))) temp <- cbind(temp, z) } - new <- new %>% - dplyr::mutate(!!new_var := rowMeans(temp, na.rm = TRUE)) + x <- as.data.frame(rowMeans(temp, na.rm = TRUE)) + colnames(x) <- c(new_var) + new <- cbind(new, x) } } if(verbose){ cat("Summary of Exposure Main Effects:", "\n") - print(psych::describe(new %>% select(-c("ID")), fast = TRUE)) + summary(new[, !colnames(new) %in% "ID"]) cat("\n") - } - tot_hist <- apply(gtools::permutations(2, nrow(epochs), c("l", "h"), repeats.allowed = TRUE), 1, paste, sep = "", collapse = "-") # Assigning history (e.g., h-h-h) based on user-specified hi/lo cutoffs - if( !is.na(ref) & !is.null(comps)){ + if( !is.na(ref) && !is.null(comps)){ tot_hist <- tot_hist[tot_hist %in% c(ref, comps)] } - epochs$epochs <- as.character(epochs$epochs) - if (exposure_type == "continuous"){ + if(is.null(hi_lo_cut)){ + # use median as hi/lo split (default) new$history <- lapply(seq_len(nrow(new)), function(x) { + paste(lapply(seq_len(nrow(epochs)), function(y) { + if (is.na(new[x, y + 1])) { return(NA) } @@ -133,9 +131,11 @@ eval_hist <- function(data, exposure, tv_confounders, epochs = NULL, time_pts, h } }), collapse = "-") }) + } else{ #user-supplied values if (length(hi_lo_cut) == 2){ + hi_cutoff <- hi_lo_cut[1] lo_cutoff <- hi_lo_cut[2] @@ -150,12 +150,15 @@ eval_hist <- function(data, exposure, tv_confounders, epochs = NULL, time_pts, h if (hi_lo_cut > 1 || hi_lo_cut < 0) { stop('Please select a hi_lo cutoff value between 0 and 1', call. = FALSE) } + hi_cutoff <- hi_lo_cut lo_cutoff <- hi_lo_cut } new$history <- lapply(seq_len(nrow(new)), function(x) { + paste(lapply(seq_len(nrow(epochs)), function(y) { + if (is.na(new[x, y + 1])) { return(NA) } @@ -173,8 +176,11 @@ eval_hist <- function(data, exposure, tv_confounders, epochs = NULL, time_pts, h } else if (exposure_type == "binary"){ + new$history <- lapply(seq_len(nrow(new)), function(x) { + paste(lapply(seq_len(nrow(epochs)), function(y) { + if (is.na(new[x, y + 1])) { return(NA) } @@ -184,18 +190,20 @@ eval_hist <- function(data, exposure, tv_confounders, epochs = NULL, time_pts, h if (new[x, y + 1] == 0) { return("l") } + }), collapse = "-") + }) } # Summarizing n's by history - his_summ <- new %>% - dplyr::group_by(history) %>% - dplyr::summarize(n = dplyr::n()) + new$history <- unlist(new$history) + his_summ <- aggregate( ID ~ history, data = new, + FUN = function(x) n = length(x)) + colnames(his_summ) <- c("history", "n") - if( !is.na(ref) & !is.null(comps)){ - his_summ <- his_summ %>% - dplyr::filter(history %in% c(ref, comps)) + if( !is.na(ref) && !is.null(comps)){ + his_sum <- his_summ[his_summ$history %in% c(ref, comps), ] } his_summ <- his_summ[! grepl("NA", his_summ$history),] @@ -203,32 +211,59 @@ eval_hist <- function(data, exposure, tv_confounders, epochs = NULL, time_pts, h if(verbose){ if(!is.null(hi_lo_cut)){ - cat(paste0("USER ALERT: Out of the total of ", nrow(data_wide), " individuals in the sample, below is the distribution of the ", sum(his_summ$n), " (", - round((sum(his_summ$n) / nrow(data_wide)) * 100, 2), "%) that fall into ", nrow(his_summ), " out of the ", length(tot_hist), - " the total user-defined exposure histories created from ", - hi_lo_cut[2] * 100, "th and ", hi_lo_cut[1] * 100, "th percentile values for low and high levels of exposure ", exposure, - ", respectively, across ", paste(epochs$epochs, collapse = ", ")), "\n") + # cat(paste0("USER ALERT: Out of the total of ", nrow(data_wide), " individuals in the sample, below is the distribution of the ", sum(his_summ$n), " (", + # round((sum(his_summ$n) / nrow(data_wide)) * 100, 2), "%) that fall into ", nrow(his_summ), " out of the ", length(tot_hist), + # " the total user-defined exposure histories created from ", + # hi_lo_cut[2] * 100, "th and ", hi_lo_cut[1] * 100, "th percentile values for low and high levels of exposure ", exposure, + # ", respectively, across ", paste(epochs$epochs, collapse = ", ")), "\n") + + cat(sprintf("USER ALERT: Out of the total of %s individuals in the sample, below is the distribution of the %s (%s%%) individuals + that fall into %s out of the %s the total user-defined exposure histories created from + %sth and %sth percentile values for low and high levels of exposure %s, respectively, across %s. \n", + nrow(data_wide), + sum(his_summ$n), + round((sum(his_summ$n) / nrow(data_wide)) * 100, 2), + nrow(his_summ), + length(tot_hist), + hi_lo_cut[2] * 100, + hi_lo_cut[1] * 100, + exposure, + paste(epochs$epochs, collapse = ", "))) + } else{ - cat(paste0("USER ALERT: Out of the total of ", nrow(data_wide), " individuals in the sample, below is the distribution of the ", sum(his_summ$n), " (", - round((sum(his_summ$n) / nrow(data_wide)) * 100, 2), "%) that fall into ", nrow(his_summ), " out of the ", length(tot_hist), - " the total user-defined exposure histories created from median split values for low and high levels of exposure ", exposure, - ", respectively, across ", paste(epochs$epochs, collapse = ", ")), "\n") + # cat(paste0("USER ALERT: Out of the total of ", nrow(data_wide), " individuals in the sample, below is the distribution of the ", sum(his_summ$n), " (", + # round((sum(his_summ$n) / nrow(data_wide)) * 100, 2), "%) that fall into ", nrow(his_summ), " out of the ", length(tot_hist), + # " the total user-defined exposure histories created from median split values for low and high levels of exposure ", exposure, + # ", respectively, across ", paste(epochs$epochs, collapse = ", ")), "\n") + + cat(sprintf("USER ALERT: Out of the total of %s individuals in the sample, below is the distribution of the %s (%s%%) individuals that fall into %s + out of the %s total user-defined exposure histories created from median split values for low and high levels of exposure + %s, respectively, across %s. \n", + nrow(data_wide), + sum(his_summ$n), + round((sum(his_summ$n) / nrow(data_wide)) * 100, 2), + nrow(his_summ), + length(tot_hist), + exposure, + paste(epochs$epochs, collapse = ", "))) } - cat("USER ALERT: Please inspect the distribution of the sample across the following exposure histories and ensure there is sufficient spread to avoid extrapolation and low precision:", "\n") + cat("USER ALERT: Please inspect the distribution of the sample across the following exposure histories and ensure there is + sufficient spread to avoid extrapolation and low precision:", "\n") if (nrow(his_summ) != length(tot_hist)) { - cat(paste0("USER ALERT: There are no individuals in your sample that fall into ", - paste(tot_hist[!tot_hist %in% his_summ$history], collapse = " & "), - " exposure history/histories. You may wish to consider different high/low cutoffs (for continuous exposures), alternative epochs, or choose a different measure to avoid extrapolation."), "\n") + + warning(sprintf("USER ALERT: There are no individuals in your sample that fall into %s exposure history/histories. + You may wish to consider different high/low cutoffs (for continuous exposures), alternative epochs, or choose a different measure to avoid extrapolation.\n", + paste(tot_hist[!tot_hist %in% his_summ$history], collapse = " & ")), call. = FALSE) cat("\n") } cat("\n") - cat(knitr::kable(his_summ, caption = paste0("Summary of user-specified exposure ", exposure, - " histories based on exposure main effects ", paste(epochs$epochs, collapse = ", "), - " containing time points ", paste(epochs$values, collapse = ", "), ":"), + cat(knitr::kable(his_summ, caption = sprintf("Summary of user-specified exposure %s histories based on exposure main effects %s + containing time points %s:", + exposure, paste(epochs$epochs, collapse = ", "), paste(epochs$values, collapse = ", ")), format = 'pipe', row.names = F), sep = "\n") } diff --git a/R/fitModel.R b/R/fitModel.R index e6d9c9bc..bf22d5d2 100644 --- a/R/fitModel.R +++ b/R/fitModel.R @@ -3,9 +3,9 @@ #' Fits weighted marginal outcome model as a generalized linear model of the #' user's choosing, relating exposure main effects to outcome using IPTW #' weights. -#' @importFrom dplyr mutate select #' @seealso {[survey::svyglm()] for more on family/link specifications, #' } +#' @importFrom survey svyglm #' @param home_dir path to home directory (required if 'save.out' = TRUE) #' @param data data in wide format as: a data frame, list of imputed data #' frames, or mids object @@ -110,6 +110,9 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco if (missing(home_dir)) { stop("Please supply a home directory.", call. = FALSE) } + else if(!is.character(home_dir)){ + stop("Please provide a valid home directory path as a string if you wish to save output locally.", call. = FALSE) + } else if(!dir.exists(home_dir)) { stop("Please provide a valid home directory path if you wish to save output locally.", call. = FALSE) } @@ -119,51 +122,103 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco stop("Please supply data as either a dataframe with no missing data or imputed data in the form of a mids object or path to folder with imputed csv datasets.", call. = FALSE) } + else if (!inherits(data, "mids") && !is.data.frame(data) && !is.list(data)) { + stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", call. = FALSE) + } + else if(is.list(data) && !is.data.frame(data)){ + if (sum(sapply(data, is.data.frame)) != length(data)){ + stop("Please supply a list of data frames that have been imputed.", call. = FALSE) + } + } + if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } + else if(!is.character(exposure) || length(exposure) != 1){ + stop("Please supply a single exposure as a character.", call. = FALSE) + } + if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } + else if(!is.character(outcome) || length(outcome) != 1){ + stop("Please supply a single outcome as a character.", call. = FALSE) + } + if (missing(weights)){ stop("Please supply a list of IPTW weights.", call. = FALSE) } + else if (!is.list(weights) || is.data.frame(weights)){ + stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + } + else if(is.list(weights) && !is.data.frame(weights)){ + if (sum(sapply(weights, function(x) { + inherits(x, "weightitMSM")})) != length(weights)){ + stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + } + } + if (missing(exposure_time_pts)){ stop("Please supply the exposure time points at which you wish to create weights.", call. = FALSE) } + else if(!is.numeric(exposure_time_pts)){ + stop("Please supply a list of exposure time points as integers.", call. = FALSE) + } + if (missing(model)){ stop('Please provide an outcome model selection "m" from 0-3 (e.g., "m1")', call. = FALSE) } - if (!inherits(data, "mids") & !is.data.frame(data) & !inherits(data, "list")) { - stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", call. = FALSE) - } if (!is.character(model)){ stop('Please provide as a character string a valid model "m" from 0-3 (e.g., "m1")', call. = FALSE) + else if (!is.character(model) || length(model) != 1){ + stop('Please provide a single outcome model selection "m" from 0-3 (e.g., "m1")', call. = FALSE) } if (!(model %in% c("m0", "m1", "m2", "m3"))) { stop('Please provide a valid model "m" from 0-3 (e.g., "m1")', call. = FALSE) } - if ((model == "m2" | model == "m3") & (is.na(int_order) | !is.numeric(int_order))){ + if ((model == "m2" | model == "m3") && (is.na(int_order) || !is.numeric(int_order))){ stop("Please provide an integer interaction order if you select a model with interactions.", call. = FALSE) } - if ((model == "m1" | model == "m3") & (is.null(covariates) | !is.character(covariates))){ + if ((model == "m1" | model == "m3") && (is.null(covariates) || !is.character(covariates))){ stop("Please provide a list of covariates as characters if you select a covariate model.", call. = FALSE) } if(!inherits(family, "function")){ stop("Please provide a valid family in the form of a function (without quotations).", call. = FALSE) } + if(length(family) != 1){ + stop("Please provide a single valid family in the form of a function (without quotations).", call. = FALSE) + } + if(!inherits(link, "character")){ stop("Please provide as a character a valid link function.", call. = FALSE) } + else if(length(link) != 1){ + stop("Please provide as a character a valid link function.", call. = FALSE) + } + if (!is.null(covariates)){ + if(!is.character(covariates)){ + stop("Please provide a list of character strings for covariates.", call. = FALSE) + } if (sum(as.numeric(sapply(strsplit(covariates, "\\."), "[", 2)) > exposure_time_pts[1], na.rm = T) > 0){ warning("Please only include covariates that are time invariant or measured at the first exposure time point.") } } - if (!inherits(weights, "list")){ - stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + + if(!is.logical(verbose)){ + stop("Please set verbose to either TRUE or FALSE.", call. = FALSE) + } + else if(length(verbose) != 1){ + stop("Please provide a single TRUE or FALSE value to verbose.", call. = FALSE) + } + + if(!is.logical(save.out)){ + stop("Please set save.out to either TRUE or FALSE.", call. = FALSE) + } + else if(length(save.out) != 1){ + stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE) } @@ -179,10 +234,13 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco # Lists out exposure-epoch combos if( is.null(epochs)){ #making epochs time pts if not specified by user + epochs <- data.frame(epochs = as.character(exposure_time_pts), values = exposure_time_pts) + } else{ - if( !is.data.frame(epochs) | ncol(epochs) != 2 | sum(colnames(epochs) == c("epochs", "values")) != ncol(epochs)){ + + if( !is.data.frame(epochs) || ncol(epochs) != 2 || sum(colnames(epochs) == c("epochs", "values")) != ncol(epochs)){ stop("If you supply epochs, please provide a dataframe with two columns of epochs and values.", call. = FALSE) } @@ -191,10 +249,8 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco } } - exp_epochs <- apply(expand.grid(exposure, as.character(epochs[, 1])), 1, paste, sep = "", collapse = ".") - #getting null comparison if (model == "m0") {n <- "int"} else if (model == "m1") {n <- "covs"} @@ -210,20 +266,25 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco # } if (inherits(data, "mids")){ #imputed dataset + fits <- lapply(seq_len(data$m), function(y) { - d <- complete(data, y) + + d <- mice::complete(data, y) d$weights <- NULL d$weights <- weights[[y]]$weights getModel(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs, int_order, model, family, covariates, verbose) + }) fits.null <- lapply(seq_len(data$m), function(y) { - d <- complete(data, y) + + d <- mice::complete(data, y) d$weights <- NULL d$weights <- weights[[y]]$weights getModel(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs, int_order, model = n, family, covariates, verbose) + }) if (verbose){ @@ -238,19 +299,23 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco } - else if (inherits(data, "list")){ #imputed dataset + else if (is.list(data) && !is.data.frame(data)){ #imputed dataset fits <- lapply(seq_len(length(data)), function(y) { + d <- data[[y]] d$weights <- NULL d$weights <- weights[[y]]$weights getModel(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs, int_order, model, family, covariates, verbose) + }) fits.null <- lapply(seq_len(length(data)), function(y) { + d <- data[[y]] d$weights <- NULL d$weights <- weights[[y]]$weights getModel(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs, int_order, model = n, family, covariates, verbose) + }) if (verbose){ @@ -266,17 +331,21 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco else if (is.data.frame(data)){ #df fits <- lapply(1, function(y) { + d <- data d$weights <- NULL d$weights <- weights[["0"]]$weights getModel(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs, int_order, model, family, covariates, verbose) + }) fits.null <- lapply(1, function(y) { + d <- data d$weights <- NULL d$weights <- weights[["0"]]$weights getModel(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs, int_order, model = n, family, covariates, verbose) + }) if (verbose){ @@ -293,23 +362,34 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco - if (inherits(data, "mids") | inherits(data, "list")){ + + if (inherits(data, "mids") || (is.list(data)) && !is.data.frame(data)){ names(fits) <- seq_len(length(fits)) if (verbose){ - cat(paste0("The marginal model, ", model, ", run for each imputed dataset is summarized below:"), "\n") + + # cat(paste0("The marginal model, ", model, ", run for each imputed dataset is summarized below:"), "\n") + sprintf("The marginal model, %s run for each imputed dataset is summarized below: \n", + model) + print(suppressWarnings(jtools::export_summs( - fits, statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), + fits, + statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), model.names = c(paste0("Imp.", seq_len(length(fits)))) ))) } if(save.out){ suppressWarnings(jtools::export_summs( - fits, to.file = "docx", statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), + fits, + to.file = "docx", + statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), model.names = c(paste0("Imp.", seq_len(length(fits)))), - file.name = file.path(home_dir, "models", paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx")) + file.name = file.path(home_dir, "models", + # paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx")) + sprintf("%s-%s_%s_table_mod_ev.docx", + exposure, outcome, model)) )) } @@ -319,22 +399,34 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco if (verbose){ require(survey) cat(paste0("The marginal model, ", model, ", is summarized below:"), "\n") - print(suppressWarnings(jtools::export_summs(fits, statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared")))) + print(suppressWarnings( + jtools::export_summs( + fits, + statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared")))) } if (save.out){ - requireNamespace(officer) #is there another way to do this? required for writing to word - requireNamespace(flextable) # " " - suppressWarnings(jtools::export_summs(fits, to.file = "docx", statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), - file.name = file.path(home_dir, "models", paste0(exposure, "-", outcome, "_", model, - "_table_mod_ev.docx")))) + # requireNamespace(officer) #is there another way to do this? required for writing to word + # requireNamespace(flextable) # " " + suppressWarnings( + jtools::export_summs(fits, + to.file = "docx", + statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), + file.name = file.path(home_dir, "models", + # paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx")))) + sprintf("%s-%s_%s_table_mod_ev.docx", + exposure, outcome, model)))) } } if(save.out){ - saveRDS(fits, file = file.path(home_dir, "/models/", paste0(exposure, "-", outcome, "_", model, "_model.rds"))) + saveRDS(fits, + file = file.path(home_dir, "/models/", + # paste0(exposure, "-", outcome, "_", model, "_model.rds"))) + sprintf("%s-%s_%s_model.rds", + exposure, outcome, model))) cat("\n") if (verbose){ diff --git a/R/getModel.R b/R/getModel.R index 96a0f358..1fa1cee3 100644 --- a/R/getModel.R +++ b/R/getModel.R @@ -87,11 +87,15 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs for (l in seq_len(length(as.numeric(unlist(epochs[e, 2]))))){ level <- as.numeric(unlist(epochs[e, 2]))[l] z <- d[, names(d)[grepl(exposure, names(d))]] #finds exposure vars - z <- as.numeric(as.character(unlist(z[, sapply(strsplit(names(z), "\\."), "[", 2) == as.character(level)]))) + cols <- colnames(z)[as.logical(sapply(strsplit(names(z), "\\."), "[", 2) == as.character(level))] + cols <- cols[!is.na(cols)] + z <- as.numeric(as.character(unlist(z[, cols]))) temp <- cbind(temp, z) } #adds a new variable of the exposure averaged within epoch - d <- d %>% dplyr::mutate(!!new_var := rowMeans(temp, na.rm=T)) + x <- as.data.frame(rowMeans(temp, na.rm = TRUE)) + colnames(x) <- c(new_var) + d <- cbind(d, x) d[, new_var] <- as.numeric(d[, new_var]) } } @@ -122,12 +126,15 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs }), collapse = " + " ) + #create interactions in data for (x in seq_len(length(unlist(strsplit(interactions, "\\+"))))) { name <- gsub(" ", "", unlist(strsplit(interactions, "\\+"))[x]) if (! name %in% colnames(d)){ temp <- d[, c(gsub(" ", "", as.character(unlist(strsplit(unlist(strsplit(interactions, "\\+"))[x], "\\:"))))) ] - d <- d %>% dplyr::mutate(!! name := matrixStats::rowProds(as.matrix(temp), na.rm = T)) + new <- matrixStats::rowProds(as.matrix(temp), na.rm = T) + names(new) <- name + d <- cbind(d, new) } } } @@ -147,19 +154,25 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs # Fitting intercept-only model if (model == "int"){ fi <- paste(outcome, "~ 1") - mi <- survey::svyglm(as.formula(fi), family = fam, design = s) + mi <- survey::svyglm(as.formula(fi), + family = fam, + design = s) return(mi) } else if (model == "covs"){ fc <- paste(outcome, "~", covariate_list) - mc <- survey::svyglm(as.formula(fc), family = fam, design = s) + mc <- survey::svyglm(as.formula(fc), + family = fam, + design = s) return(mc) } # Fitting baseline model w/ main effects only (m0) for all models f0 <- paste(outcome, "~", paste0(exp_epochs, sep = "", collapse = " + ")) - m0 <- survey::svyglm(as.formula(f0), family = fam, design = s) + m0 <- survey::svyglm(as.formula(f0), + family = fam, + design = s) if (model == "m0") { return(m0) # Save model @@ -168,7 +181,9 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs # Baseline + sig covar model OR baseline + sig covar + int model if (model == "m1" | model == "m3") { f1 <- paste(f0, "+", covariate_list) # Baseline + covariate model - m1 <- survey::svyglm(as.formula(f1), family = fam, design = s) + m1 <- survey::svyglm(as.formula(f1), + family = fam, + design = s) # Baseline + imbalanced covars if (model == "m1") { @@ -183,7 +198,9 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs # Baseline + interactions if (model == "m2") { # Fitting m2 - m2 <- survey::svyglm(as.formula(f2), family = fam, design = s) + m2 <- survey::svyglm(as.formula(f2), + family = fam, + design = s) # Baseline + interactions return(m2) } @@ -193,7 +210,9 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs # Fitting m3 f3 <- paste0(f1, "+", paste(interactions, sep = "", collapse = " + ")) f3 <- as.formula(f3) - m3 <- survey::svyglm(f3, family = fam, design = s) + m3 <- survey::svyglm(f3, + family = fam, + design = s) # Baseline + covars+ interactions return(m3) diff --git a/R/make_love_plot.R b/R/make_love_plot.R index 3c6f7457..27c597fd 100644 --- a/R/make_love_plot.R +++ b/R/make_love_plot.R @@ -1,6 +1,6 @@ #' Create love plots showing balancing statistics #' -#' @param home_dir path to home directory +#' @param home_dir path to home directory (required if save.out = TRUE) #' @param folder folder path for saving #' @param exposure name of exposure variable #' @param exposure_time_pt exposure time point integer @@ -51,7 +51,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "weighted", #' weights = w, #' formulas = f, @@ -88,26 +87,27 @@ make_love_plot <- function(home_dir, folder, exposure, exposure_time_pt, exposur stat_var <- colnames(balance_stats)[grepl("_bal", colnames(balance_stats))] colnames(balance_stats)[colnames(balance_stats) == stat_var] <- "avg_bal" - balance_stats <- balance_stats %>% dplyr::arrange(avg_bal) + # balance_stats <- balance_stats %>% dplyr::arrange(avg_bal) + balance_stats <- balance_stats[order(balance_stats$avg_bal), , drop = FALSE] - x_lab <- ifelse(exposure_type == "continuous", "Correlation with Exposure", "Standardized Mean Difference Between Exposures") + x_lab <- if(exposure_type == "continuous") "Correlation with Exposure" else "Standardized Mean Difference Between Exposures" labels <- ifelse(balance_stats$balanced == 0, balance_stats$covariate, "") - min_val <- ifelse(min(balance_stats[, "avg_bal"]) < 0, min(balance_stats[, "avg_bal"]) - 0.05, min(balance_thresh) - 0.05) + min_val <- if(min(balance_stats[, "avg_bal"]) < 0) min(balance_stats[, "avg_bal"]) - 0.05 else min(balance_thresh) - 0.05 if (min_val > -(max(balance_thresh))){ - min_val = -(max(balance_thresh)) - 0.05 #to make sure user-supplied balance thresh is on the figure + min_val <- -(max(balance_thresh)) - 0.05 #to make sure user-supplied balance thresh is on the figure } - max_val <- ifelse(max(balance_stats[, "avg_bal"]) > 0, max(balance_stats[, "avg_bal"]) + 0.05, max(balance_thresh) + 0.05) + max_val <- if(max(balance_stats[, "avg_bal"]) > 0) max(balance_stats[, "avg_bal"]) + 0.05 else max(balance_thresh) + 0.05 if (max_val < max(balance_thresh)){ - max_val = max(balance_thresh) + 0.05 #to make sure user-supplied balance thresh is on the figure + max_val <- max(balance_thresh) + 0.05 #to make sure user-supplied balance thresh is on the figure } # Make love plot per exposure time point - lp <- ggplot2::ggplot(aes(x = avg_bal, y = reorder(as.factor(covariate), avg_bal)), data = balance_stats) + - ggplot2::geom_point(aes(y = reorder(as.factor(covariate), avg_bal), x = avg_bal, fill = "white", alpha = 1)) + - ggplot2::geom_text(aes(label = labels, hjust = -0.2, vjust = 0.2), size = 1.5, color = "red") + + lp <- ggplot2::ggplot(ggplot2::aes(x = avg_bal, y = reorder(as.factor(covariate), avg_bal)), data = balance_stats) + + ggplot2::geom_point(ggplot2::aes(y = reorder(as.factor(covariate), avg_bal), x = avg_bal, fill = "white", alpha = 1)) + + ggplot2::geom_text(ggplot2::aes(label = labels, hjust = -0.2, vjust = 0.2), size = 1.5, color = "red") + ggplot2::xlab(x_lab) + ggplot2::ylab("Covariate") + ggplot2::xlim(min_val, max_val) + @@ -143,18 +143,25 @@ make_love_plot <- function(home_dir, folder, exposure, exposure_time_pt, exposur lp <- lp + ggplot2::ggtitle(paste0(exposure, " (t = ", exposure_time_pt, ") Balance for Imputation ", k)) if(save.out){ - suppressMessages(ggplot2::ggsave(lp, filename = paste0(home_dir, "/balance/", folder, "plots/", - form_name, "_imp_", k, "_", exposure, "_", exposure_time_pt, "_", - weights_method, "_summary_balance_plot.jpeg"), width = 6, height = 8)) + suppressMessages(ggplot2::ggsave(lp, filename = + # paste0(home_dir, "/balance/", folder, "plots/", + # form_name, "_imp_", k, "_", exposure, "_", exposure_time_pt, "_", + # weights_method, "_summary_balance_plot.jpeg") + sprintf("%s/balance/%splots/%s_imp_%s_%s_%s_%s_summary_balance_plot.jpeg", + home_dir, folder, form_name, k, exposure, exposure_time_pt, weights_method), + width = 6, height = 8)) } } else { lp <- lp + ggplot2::ggtitle(paste0(exposure, " (t = ", exposure_time_pt, ") Balance")) if(save.out){ - suppressMessages(ggplot2::ggsave(lp, filename = paste0(home_dir, "/balance/", folder, "plots/", form_name, "_", exposure, "_", - exposure_time_pt, "_", weights_method, "_summary_balance_plot.jpeg"), - width = 6, height = 8)) + suppressMessages(ggplot2::ggsave(lp, filename = + # paste0(home_dir, "/balance/", folder, "plots/", form_name, "_", exposure, "_", + # exposure_time_pt, "_", weights_method, "_summary_balance_plot.jpeg"), + sprintf("%s/balance/%splots/%s_%s_%s_%s_summary_balance_plot.jpeg", + home_dir, folder, form_name, exposure, exposure_time_pt, weights_method), + width = 6, height = 8)) } } diff --git a/R/trimWeights.R b/R/trimWeights.R index 79906e9a..d50d8772 100644 --- a/R/trimWeights.R +++ b/R/trimWeights.R @@ -7,7 +7,7 @@ #' @seealso [WeightIt::trim()], #' which #' this function wraps -#' @param home_dir path to home directory +#' @param home_dir path to home directory (required if save.out = TRUE) #' @param exposure name of exposure variable #' @param outcome name of outcome variable with ".timepoint" suffix #' @param weights list of IPTW weights output from createWeights() @@ -63,24 +63,62 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v if (missing(home_dir)) { stop("Please supply a home directory.", call. = FALSE) } + else if(!is.character(home_dir)){ + stop("Please provide a valid home directory path as a string if you wish to save output locally.", call. = FALSE) + } else if(!dir.exists(home_dir)) { stop("Please provide a valid home directory path if you wish to save output locally.", call. = FALSE) } } + if (missing(exposure)){ + stop("Please supply a single exposure.", call. = FALSE) + } + else if(!is.character(exposure) || length(exposure) != 1){ + stop("Please supply a single exposure as a character.", call. = FALSE) + } + if (missing(weights)){ stop("Please supply a list of IPTW weights to trim.", call. = FALSE) } + else if (!is.list(weights) || is.data.frame(weights)){ + stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + } + else if(is.list(weights) && !is.data.frame(weights)){ + if (sum(sapply(weights, function(x) { + inherits(x, "weightitMSM")})) != length(weights)){ + stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + } + } + + if (missing(outcome)){ + stop("Please supply a single outcome.", call. = FALSE) + } + else if(!is.character(outcome) || length(outcome) != 1){ + stop("Please supply a single outcome as a character.", call. = FALSE) + } + if(!is.numeric(quantile)){ - stop('Please sprovide a numeric quantile value between 0 and 1.', call. = FALSE) + stop('Please provide a numeric quantile value between 0 and 1.', call. = FALSE) } else if (quantile > 1 || quantile < 0) { stop('Please provide a quantile value between 0 and 1.', call. = FALSE) } - if (!inherits(weights, "list")){ - stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + if(!is.logical(verbose)){ + stop("Please set verbose to either TRUE or FALSE.", call. = FALSE) + } + else if(length(verbose) != 1){ + stop("Please provide a single TRUE or FALSE value to verbose.", call. = FALSE) + } + + if(!is.logical(save.out)){ + stop("Please set save.out to either TRUE or FALSE.", call. = FALSE) } + else if(length(save.out) != 1){ + stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE) + } + if(save.out){ weights_dir <- file.path(home_dir, "weights") @@ -105,25 +143,37 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v t <- WeightIt::trim(w$weights, at = quantile) if (verbose){ - cat('\n') - cat(paste0("For imputation ", x, " and the ", exposure, "-", outcome, " relation, following trimming at the ", - quantile, " quantile, the median weight value is ", round(median(t), 2) , - " (SD= ", round(sd(t), 2), "; range= ", round(min(t), 2), "-", round(max(t), 2), ")."), "\n") - cat('\n') + cat('\n') + # cat(paste0("For imputation ", x, " and the ", exposure, "-", outcome, " relation, following trimming at the ", + # quantile, " quantile, the median weight value is ", round(median(t), 2) , + # " (SD= ", round(sd(t), 2), "; range= ", round(min(t), 2), "-", round(max(t), 2), ")."), "\n") + cat(sprintf("For imputation %s and the %s-%s relation, following trimming at the %s quantile, the median weight value is + %s (SD= %s; range= %s-%s). \n", + x, exposure, outcome, quantile, round(median(t), 2), round(sd(t), 2), round(min(t), 2), round(max(t)))) + cat('\n') } # Save histogram of new weights - p <- ggplot2::ggplot(as.data.frame(t), aes(x = t)) + - ggplot2::geom_histogram(color = 'black', bins = 15) + p <- ggplot2::ggplot(as.data.frame(t), ggplot2::aes(x = t)) + + ggplot2::geom_histogram(color = 'black', bins = 15) + + ggplot2::ggtitle(sprintf("Weights trimmed at the %s th value", + quantile)) if(verbose){ print(p) } if(save.out){ - ggplot2::ggsave(paste("Hist_", exposure, "-", outcome, "_", weights[[x]]$method, "_weights_trim_", - quantile, "_imp_", x, ".png", sep = ""), path = paste0(home_dir, "/weights/histograms/"), plot = p, - height = 8, width = 14) + ggplot2::ggsave( + # paste("Hist_%s-%s_%s_weights_trim_%s_imp_%s.png", sep = ""), + sprintf("Hist_%s-%s_%s_weights_trim_%s_imp_%s.png", + exposure, outcome, weights[[x]]$method, quantile, x), + path = + # paste0(home_dir, "/weights/histograms/"), + sprintf("%s/weights/histograms/", + home_dir), + plot = p, + height = 8, width = 14) } # w$weights <- NA @@ -141,37 +191,55 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v t <- WeightIt::trim(w$weights, at = quantile) if (verbose){ - cat('\n') - cat(paste0("For the ", exposure, "-", outcome, " relation, following trimming at the ", - quantile, " quantile, the median weight value is ", round(median(t), 2) , - " (SD= ", round(sd(t), 2), "; range= ", round(min(t), 2), "-", round(max(t), 2), ")."), "\n") - cat('\n') + cat('\n') + # cat(paste0("For the ", exposure, "-", outcome, " relation, following trimming at the ", + # quantile, " quantile, the median weight value is ", round(median(t), 2) , + # " (SD= ", round(sd(t), 2), "; range= ", round(min(t), 2), "-", round(max(t), 2), ")."), "\n") + cat(sprintf("For the %s-%s relation, following trimming at the %s quantile, the median weight value is + %s (SD= %s; range= %s-%s). \n", + exposure, outcome, quantile, round(median(t), 2), round(sd(t), 2), round(min(t), 2), round(max(t)))) + cat('\n') } # Save histogram of new weights - p <- ggplot2::ggplot(as.data.frame(t), aes(x = t)) + + p <- ggplot2::ggplot(as.data.frame(t), ggplot2::aes(x = t)) + ggplot2::geom_histogram(color = 'black', bins = 15) + - ggtitle(paste0("Weights trimmed at the ", quantile, "th value")) + ggplot2::ggtitle( + # paste0("Weights trimmed at the ", quantile, "th value")) + sprintf("Weights trimmed at the %sth value", quantile)) if(verbose){ print(p) } if(save.out){ - ggplot2::ggsave(paste("Hist_", exposure, "-", outcome,"_",weights[[x]]$method, "_weights_trim_", quantile, ".png", sep = ""), plot = p, - path = paste0(home_dir, "/weights/histograms/"), height = 8, width = 14) + # ggplot2::ggsave(paste("Hist_", exposure, "-", outcome,"_",weights[[x]]$method, "_weights_trim_", quantile, ".png", sep = ""), plot = p, + # path = paste0(home_dir, "/weights/histograms/"), height = 8, width = 14) + + ggplot2::ggsave( + # paste("Hist_%s-%s_%s_weights_trim_%s_imp_%s.png", sep = ""), + sprintf("Hist_%s-%s_%s_weights_trim_%s.png", + exposure, outcome, weights[[x]]$method, quantile), + path = + # paste0(home_dir, "/weights/histograms/"), + sprintf("%s/weights/histograms/", home_dir), + plot = p, + height = 8, width = 14) } - w$weights <- NA - w$weights <- t - w + w$weights <- NA + w$weights <- t + w }) names(trim_weights) <- "0" } if(save.out){ # Save truncated weight data - saveRDS(trim_weights, paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", weights[[1]]$method, "_weights_trim.rds")) + saveRDS(trim_weights, + # paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", weights[[1]]$method, "_weights_trim.rds")) + sprintf("%s/weights/values/%s-%s_%s_weights_trim.rds", + home_dir, exposure, outcome,weights[[1]]$method )) } trim_weights diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd index 7ae95feb..0f741ab4 100644 --- a/examplePipelineRevised.Rmd +++ b/examplePipelineRevised.Rmd @@ -47,7 +47,7 @@ library(devtools) install_github("istallworthy/devMSMs") library(devMSMs) -#get helper files from github +#get helper files from github. Note: you need to install devMSMs for inspectData to work. install_github("istallworthy/devMSMsHelpers") library(devMSMsHelpers) @@ -56,7 +56,7 @@ library(devMSMsHelpers) #prior to re-installing using the code above. Sorry, this is annoying! #conducting package checks & tests from documentation --just for IS to run; comment out when testing the workflow -# devtools::check() + devtools::check() ``` @@ -67,14 +67,19 @@ library(devMSMsHelpers) set.seed(1234) +#required if you wish to use save.out = TRUE in the functions home_dir <- '/Users/isabella/Library/CloudStorage/Box-Box/BSL General/MSMs/testing/isa' #no / after +#required exposure <- "ESETA1" +#required exposure_time_pts <- c(6, 15, 24, 35, 58) +#required outcome <- "EF_avg_perc.58" +#these are now optional tv_confounders <- c("SAAmylase.6","SAAmylase.15", "SAAmylase.24", "MDI.6", "MDI.15", # "RHasSO.6", "RHasSO.15", "RHasSO.24", #removed for low variability @@ -93,6 +98,7 @@ tv_confounders <- c("SAAmylase.6","SAAmylase.15", "SAAmylase.24", "StrDif_Tot.35", "StrDif_Tot.58", "EF_avg_perc.35", "EF_avg_perc.58") +#required ti_confounders <- c("state", "BioDadInHH2", "PmAge2", "PmBlac2", "TcBlac2", "PmMrSt2", "PmEd2", "KFASTScr", "RMomAgeU", "RHealth", "HomeOwnd", "SWghtLB", "SurpPreg", "SmokTotl", "DrnkFreq", "peri_health", "mat_health", "gov_asst") @@ -120,18 +126,18 @@ Choose from the following preliminary steps to assign to 'data' one of the follo - a mids object (output from mice::mice()) of data imputed in wide format - a list of data imputed in wide format -Data columns shoudl be either numeric or factor form. Note that at present, the code can only take factors with 2 levels. +Data columns should be either numeric or factor form. ### P1. Format Data #### P1a. Format single data frame of long data ```{r} # if you have long data with wrong time, id, and/or missing indicators and need to format -data_long_f <- formatLongData(home_dir, data_long, exposure, exposure_time_pts, outcome, tv_confounders, +data_long_f <- formatLongData(home_dir, data_long, exposure, exposure_time_pts, outcome, time_var = "TAge", #list original time variable here if it's not "WAVE" id_var = "S_ID", #list original id variable here if it's not "ID" missing = NA, #list missing value here - factor_confounders = c("state","TcBlac2", "PmBlac2", "RHasSO"), + factor_confounders = c("state","TcBlac2", "PmBlac2", "RHasSO", "PmMrSt2", "HomeOwnd"), save.out = TRUE) #list factor variables here @@ -156,6 +162,7 @@ data_wide <- data_wide[,colSums(is.na(data_wide)) < nrow(data_wide)] #removing vars w/no variability; specific to FLP data +library(dplyr) data_wide <- data_wide %>% dplyr::select(-c(RHasSO.6, RHasSO.15 , RHasSO.24)) @@ -189,12 +196,18 @@ imputed_data <- readRDS(paste0(home_dir, "/imputed_data.rds")) #place your .rds data <- imputed_data + #optional: extract first imputed dataset for testing library(mice) data <- mice::complete(imputed_data, 1) #just for testing purposes -# library(dplyr) -# data <- data %>% -# dplyr::select(-c(contains(c(":", "Childhood", "Infancy", "Toddlerhood", "pcx")))) #removing these --came from old dataset + +#IS testing multi-level factors +data$"PmMrSt2" = as.factor(data$"PmMrSt2") +data$HomeOwnd = as.factor(data$HomeOwnd) +library(dplyr) +data <- data %>% + dplyr::select(-c(contains(c(":", "Childhood", "Infancy", "Toddlerhood", "pcx")))) #removing these --came from old dataset +data$ID = as.numeric(as.character(unlist(data$ID))) ``` @@ -258,7 +271,7 @@ comparison <- c("h-h-h", "h-h-l") #multiple ```{r} #all inputs -inspectData(data, home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, #required input +inspectData(data, home_dir, exposure, exposure_time_pts, outcome, ti_confounders, #required input epochs, hi_lo_cut, reference, comparison, verbose = TRUE, save.out = TRUE) #optional input @@ -268,7 +281,8 @@ inspectData(data, home_dir, exposure, exposure_time_pts, outcome, tv_confounders #or minimum inputs -inspectData(data, home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders) +inspectData(data, home_dir, exposure, exposure_time_pts, outcome, + ti_confounders, tv_confounders = tv_confounders) ``` @@ -298,11 +312,12 @@ custom <- full_formulas type = "full" #all inputs -full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, +full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = tv_confounders, concur_conf = concur_conf, keep_conf = NULL, custom = custom, verbose = TRUE, save.out = TRUE) #or minimum inputs -full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type) +full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = tv_confounders) + ``` @@ -322,11 +337,11 @@ type <- "prebalance" formulas <- full_formulas #all inputs -prebalance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, +prebalance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, balance_thresh = balance_thresh, imp_conf = imp_conf, verbose = TRUE, save.out = TRUE) #or minimum inputs -prebalance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas) +prebalance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas) ``` @@ -356,11 +371,11 @@ custom <- NULL type <- "short" #all inputs -short_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, +short_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = tv_confounders, concur_conf = concur_conf, keep_conf = keep_conf, custom = custom, verbose = TRUE, save.out = TRUE) #or minimum inputs -short_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type) +short_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = tv_confounders) ``` @@ -427,31 +442,31 @@ weights <- weights.ps #all inputs -balance_stats.ps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats.ps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) #minimum inputs -balance_stats.ps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights) +balance_stats.ps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights) weights <- weights.cbps -balance_stats.cbps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats.cbps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) weights <- weights.glm -balance_stats.glm <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats.glm <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) weights <- weights.gbm -balance_stats.gbm <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats.gbm <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) weights <- weights.bart -balance_stats.bart <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats.bart <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) weights <- weights.super -balance_stats.super <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats.super <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) ``` @@ -477,11 +492,11 @@ formulas <- full_formulas weights <- weights.ps #all inputs -balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) #minimum inputs -balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights) +balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights) ``` @@ -508,11 +523,12 @@ type <- "update" bal_stats <- balance_stats #all inputs -updated_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, bal_stats, +updated_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = tv_confounders, concur_conf, keep_conf, verbose = TRUE, save.out = TRUE) #minimum inputs -updated_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, bal_stats) +updated_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = tv_confounders, + bal_stats = bal_stats) ``` @@ -586,11 +602,11 @@ weights <- trim_weights #all input -final_balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +final_balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) #minimum input -final_balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights) +final_balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights) ``` #### Sensitvity analyses @@ -598,11 +614,11 @@ final_balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts #sensitivity tests weights <- trim_weights.s1 -final_balance_stats.s1 <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +final_balance_stats.s1 <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) weights <- trim_weights.s2 -final_balance_stats.s2 <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +final_balance_stats.s2 <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) ``` @@ -644,7 +660,8 @@ models <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome #minimum inputs model <- "m0" -models <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, model) +models <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, model, + epochs = epochs) #some optional inputs models <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, model, @@ -707,15 +724,15 @@ model <- models #output from fitModel #all inputs -results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, +results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, model, epochs, hi_lo_cut, reference, comparison, mc_comp_method , dose_level, exp_lab, out_lab, colors, verbose = FALSE, save.out = TRUE) #minimum inputs reference <- "l-l-l-l-l" comparison <- c("h-h-h-h-h", "l-h-h-h-h") #multiple -results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, - reference = reference, comparison = comparison) # for our data, specify ref and comp bc too many comparisons for exposure time point; +results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, model, epochs = epochs) + # reference = reference, comparison = comparison) # for our data, specify ref and comp bc too many comparisons for exposure time point; ``` ##### Sensitvity analyses @@ -723,11 +740,11 @@ results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_c #sensitivity tests model <- models.s1 -results.s1 <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, +results.s1 <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, model, epochs, hi_lo_cut, reference, comparison, mc_comp_method , dose_level, exp_lab, out_lab, colors, verbose = TRUE, save.out = TRUE) model <- models.s2 -results.s2 <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, +results.s2 <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, model, epochs, hi_lo_cut, reference, comparison, mc_comp_method , dose_level, exp_lab, out_lab, colors, verbose = TRUE, save.out = TRUE) ``` diff --git a/man/assessBalance.Rd b/man/assessBalance.Rd index 270012d0..d7f22733 100644 --- a/man/assessBalance.Rd +++ b/man/assessBalance.Rd @@ -10,7 +10,6 @@ assessBalance( exposure, exposure_time_pts, outcome, - tv_confounders, type, formulas, weights = NULL, @@ -34,9 +33,6 @@ created/assessed that correspond to time points when exposure wass measured} \item{outcome}{name of outcome variable with ".timepoint" suffix} -\item{tv_confounders}{list of time-varying confounders with ".timepoint" -suffix} - \item{type}{type of balance assessment; 'prebalance' or 'weighted'} \item{formulas}{list of balancing formulas at each time point output from @@ -93,7 +89,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "prebalance", formulas = f, save.out = FALSE) @@ -101,7 +96,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "prebalance", formulas = f, balance_thresh = 0.2, @@ -110,7 +104,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "prebalance", formulas = f, balance_thresh = c(0.1, 0.2), @@ -128,7 +121,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "weighted", weights = w, formulas = f, @@ -137,7 +129,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "weighted", weights = w, formulas = f, @@ -147,7 +138,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "weighted", weights = w, formulas = f, diff --git a/man/compareHistories.Rd b/man/compareHistories.Rd index 4b3c7325..bca151c9 100644 --- a/man/compareHistories.Rd +++ b/man/compareHistories.Rd @@ -9,7 +9,6 @@ compareHistories( exposure, exposure_time_pts, outcome, - tv_confounders, model, epochs = NULL, hi_lo_cut = NULL, @@ -34,9 +33,6 @@ created/assessed that correspond to time points when exposure wass measured} \item{outcome}{name of outcome variable with ".timepoint" suffix} -\item{tv_confounders}{list of time-varying confounders with ".timepoint" -suffix} - \item{model}{list of model outputs from fitModel()} \item{epochs}{(optional) data frame of exposure epoch labels and values} @@ -121,13 +117,11 @@ m <- fitModel(data = test, r <- compareHistories(exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), model = m, save.out = FALSE) r <- compareHistories(exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), model = m, reference = "l-l-l", comparison = "h-h-h", @@ -135,7 +129,6 @@ r <- compareHistories(exposure = "A", r <- compareHistories(exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), model = m, reference = "l-l-l", comparison = c("h-h-h", "h-l-l"), diff --git a/man/createFormulas.Rd b/man/createFormulas.Rd index 113670cf..b9ffdf16 100644 --- a/man/createFormulas.Rd +++ b/man/createFormulas.Rd @@ -9,9 +9,9 @@ createFormulas( exposure, exposure_time_pts, outcome, - tv_confounders, - ti_confounders, type, + ti_confounders, + tv_confounders = NULL, bal_stats = NULL, concur_conf = NULL, keep_conf = NULL, @@ -30,16 +30,16 @@ created/assessed that correspond to time points when exposure wass measured} \item{outcome}{name of outcome variable with ".timepoint" suffix} -\item{tv_confounders}{list of time-varying confounders with ".timepoint" -suffix} - -\item{ti_confounders}{list of time invariant confounders} - \item{type}{type of formula to create from 'full' (includes all lagged time-varying confounders), 'short' (includes time-varying confounders at t-1 lag only), or 'update' (adds to 'short' formulas any imbalanced time-varying confounders at lags great than t-1)} +\item{ti_confounders}{(optional) list of time invariant confounders} + +\item{tv_confounders}{(optional) list of time-varying confounders with ".timepoint" +suffix} + \item{bal_stats}{list of balance statistics from assessBalance(), required for 'update' type} @@ -107,7 +107,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "weighted", weights = w, formulas = f, diff --git a/man/eval_hist.Rd b/man/eval_hist.Rd index b96aa825..8169a509 100644 --- a/man/eval_hist.Rd +++ b/man/eval_hist.Rd @@ -7,7 +7,6 @@ eval_hist( data, exposure, - tv_confounders, epochs = NULL, time_pts, hi_lo_cut = NULL, @@ -22,9 +21,6 @@ data frames, or mids object} \item{exposure}{name of exposure variable} -\item{tv_confounders}{list of time-varying confounders with ".timepoint" -suffix} - \item{epochs}{(optional) data frame of exposure epoch labels and values} \item{time_pts}{list of integers at which weights will be diff --git a/man/make_love_plot.Rd b/man/make_love_plot.Rd index 0e648a1c..7f23671b 100644 --- a/man/make_love_plot.Rd +++ b/man/make_love_plot.Rd @@ -22,7 +22,7 @@ make_love_plot( ) } \arguments{ -\item{home_dir}{path to home directory} +\item{home_dir}{path to home directory (required if save.out = TRUE)} \item{folder}{folder path for saving} @@ -91,7 +91,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "weighted", weights = w, formulas = f, diff --git a/man/trimWeights.Rd b/man/trimWeights.Rd index a3ff8fce..637a9420 100644 --- a/man/trimWeights.Rd +++ b/man/trimWeights.Rd @@ -15,7 +15,7 @@ trimWeights( ) } \arguments{ -\item{home_dir}{path to home directory} +\item{home_dir}{path to home directory (required if save.out = TRUE)} \item{exposure}{name of exposure variable}