diff --git a/NAMESPACE b/NAMESPACE index 732c4197..c52f10b8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,5 @@ # Generated by roxygen2: do not edit by hand -export(assessBalance) export(calcBalStats) export(createWeights) export(fitModel) @@ -9,6 +8,7 @@ export(imputeData) importFrom(WeightIt,weightitMSM) importFrom(cobalt,bal.tab) importFrom(doRNG,"%dorng%") +importFrom(dplyr,"%>%") importFrom(dplyr,arrange) importFrom(dplyr,bind_rows) importFrom(dplyr,filter) diff --git a/R/assessBalance.R b/R/assessBalance.R index 394ed27c..9c0a7294 100644 --- a/R/assessBalance.R +++ b/R/assessBalance.R @@ -1,15 +1,5 @@ #' Assess success of covariate balancing -#' Assesses how well balance was achieved for each of the covariates/potential confounds in relation to each of the exposure, -#' and returns a list of unbalanced covariates for each exposure to add to future models #' -#' @param object msm object that contains all relevant user inputs -#' @param forms formula for assessing balance -#' @param data_for_model_with_weights imputed data with weights -#' @param histories optional binary indicator of whether to print histories for bal stats -#' @return list of unbalanced_covariates_for_models for each exposure -#' @seealso [msmObject()] for more on the weights_models param -#' @seealso [createWeights()] for more on the weights_models param -#' @export #' @importFrom knitr kable #' @importFrom ggplot2 ggplot #' @importFrom ggplot2 theme @@ -26,9 +16,9 @@ #' @importFrom ggplot2 guide_axis #' @importFrom ggplot2 ggsave #' @importFrom dplyr bind_rows -#' @examples assessBalance(object, forms, data_for_model_with_weights, histories=1) +#' @importFrom dplyr %>% -assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, type, formulas, weights = NULL, balance_thresh = 0.1, imp_conf = NULL, user.o = T){ +assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights = NULL, balance_thresh = 0.1, imp_conf = NULL, user.o = TRUE){ # Error checking if (!dir.exists(home_dir)) { @@ -50,6 +40,9 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty stop("If you provide a list of important confounders, please provide a list of two balance tresholds for important and less important confounders, respectively") } + + folder <- ifelse(type == "prebalance", "prebalance/", "weighted/") + #creating required directories balance_dir <- file.path(home_dir, "balance") if (!dir.exists(balance_dir)) { @@ -64,9 +57,7 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty dir.create(plots_dir) } - exposure_time_pts <- as.numeric(sapply(strsplit(tv_confounders[grepl(exposure, tv_confounders)] , "\\."), "[",2)) - - # exposure_type <- ifelse(class(data[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary") + # exposure_time_pts <- as.numeric(sapply(strsplit(tv_confounders[grepl(exposure, tv_confounders)] , "\\."), "[",2)) weights_method <- ifelse(type == "prebalance", "no weights", weights[[1]]$method) @@ -78,7 +69,7 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty stop("Please provide a valid directory path with imputed .csv's, a data frame, or a 'mids' object for the 'data' field.") } if (length(dir(data)) < 2) { - stop("If you specify data as a directory, please supply more than 1 imputed dataset.") + stop("If you specify data as a directory, please supply more than 1 imputed dataset in addition to the original data.") } # List imputed files @@ -91,6 +82,7 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty if (sum(duplicated(imp_data$"ID")) > 0){ stop("Please provide a directory to data that are imputed in wide format with a single row per ID.") } + imp_data }) } @@ -112,14 +104,14 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty bal_stats <- lapply(1:m, function(k) { d <-as.data.frame(mice::complete(data,k)) - - exposure_type <- ifelse(class(d[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary") + exposure_type <- ifelse(class(d[, paste0(exposure, '.', + exposure_time_pts[1])]) == "numeric", "continuous", "binary") if (sum(duplicated(d$"ID")) > 0){ stop("Please provide wide imputed datasets with a single row per ID.") } - calcBalStats(d, formulas, exposure, outcome, balance_thresh, k = k, weights = NULL, imp_conf, user.o) + calcBalStats(d, formulas, exposure, exposure_time_pts, outcome, balance_thresh, k = k, weights = NULL, imp_conf, user.o) }) } @@ -129,13 +121,14 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty bal_stats <- lapply(1:m, function(k) { d <- data[[k]] - exposure_type <- ifelse(class(d[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary") + exposure_type <- ifelse(class(d[, paste0(exposure, '.', + exposure_time_pts[1])]) == "numeric", "continuous", "binary") if (sum(duplicated(d$"ID")) > 0){ stop("Please provide wide imputd datasets with a single row per ID.") } - calcBalStats(d, formulas, exposure, outcome, balance_thresh, k = k, weights = NULL, imp_conf, user.o) + calcBalStats(d, formulas, exposure, exposure_time_pts, outcome, balance_thresh, k = k, weights = NULL, imp_conf, user.o) }) } @@ -143,7 +136,8 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty # Save out balance stats for each imputed dataset bal_stats_all_imp <- do.call(bind_rows, bal_stats) bal_stats_all_imp <- bal_stats_all_imp[order(bal_stats_all_imp$covariate), ] - write.csv(bal_stats_all_imp, paste0(home_dir, "/balance/", type, "/", exposure, "_all_imps_balance_stat_summary.csv"), row.names = FALSE) + write.csv(bal_stats_all_imp, paste0(home_dir, "/balance/", type, "/", + exposure, "_all_imps_balance_stat_summary.csv"), row.names = FALSE) if (user.o == TRUE){ message("Pre balance statistics for each imputed dataset have now been saved in the 'balance/prebalance/' folder", "\n") @@ -162,10 +156,12 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty if (!is.null(imp_conf)){ all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = ifelse(all_bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2])) - all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) ) + all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < + all_bal_stats$bal_thresh, 1, 0) ) } else{ all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = balance_thresh) - all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) ) + all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < + all_bal_stats$bal_thresh, 1, 0) ) } cat("\n") @@ -184,8 +180,7 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty } exposure_type <- ifelse(class(data[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary") - - bal_stats <- calcBalStats(data, formulas, exposure, outcome, balance_thresh, k = 0, weights = NULL, imp_conf, user.o) + bal_stats <- calcBalStats(data, formulas, exposure, exposure_time_pts, outcome, balance_thresh, k = 0, weights = NULL, imp_conf, user.o) # Gathering imbalanced covariate statistics for the final list/assessment of imbalanced covariates all_bal_stats <- data.frame( @@ -215,8 +210,9 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty bal_stats <- lapply(1:m, function(k) { d <- as.data.frame(mice::complete(data, k)) - - exposure_type <- ifelse(class(d[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary") + # exposure_type <- ifelse(class(d[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary") + assign("exposure_type", ifelse(class(d[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary"), + envir = .GlobalEnv) if (sum(duplicated(d$"ID")) > 0){ stop("Please provide wide imputed datasets with a single row per ID.") @@ -224,16 +220,18 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty w <- weights[[k]] - calcBalStats(d, formulas, exposure, outcome, balance_thresh, k = k, weights = w, imp_conf, user.o) #are weights just in data at this point? + calcBalStats(d, formulas, exposure, exposure_time_pts, outcome, balance_thresh, k = k, weights = w, imp_conf, user.o) #are weights just in data at this point? }) } + if (class(data) == "list"){ m = length(data) bal_stats <- lapply(1:m, function(k) { d <- data[[k]] - exposure_type <- ifelse(class(d[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary") + assign("exposure_type", ifelse(class(d[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary"), + envir = .GlobalEnv) if (sum(duplicated(d$"ID")) > 0){ stop("Please provide wide imputed datasets with a single row per ID.") @@ -241,14 +239,16 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty w <- weights[[k]] - calcBalStats(d, formulas, exposure, outcome, balance_thresh, k = k, weights = w, imp_conf, user.o) + calcBalStats(d, formulas, exposure, exposure_time_pts, outcome, balance_thresh, k = k, weights = w, imp_conf, user.o) }) } + # Save out balance stats for each imputed dataset bal_stats_all_imp <- do.call(bind_rows, bal_stats) bal_stats_all_imp <- bal_stats_all_imp[order(bal_stats_all_imp$covariate), ] - write.csv(bal_stats_all_imp, paste0(home_dir, "/balance/", type, "/", exposure, "_all_imps_balance_stat_summary.csv"), row.names = FALSE) + write.csv(bal_stats_all_imp, paste0(home_dir, "/balance/", type, "/", exposure, + "_all_imps_balance_stat_summary.csv"), row.names = FALSE) if (user.o == TRUE){ message("Weighted balance statistics for each imputed dataset have now been saved in the 'balance/weighted/' folder", "\n") @@ -267,10 +267,12 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty if (!is.null(imp_conf)){ all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = ifelse(all_bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2])) - all_bal_stats <- all_bal_stats %>%dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) ) + all_bal_stats <- all_bal_stats %>%dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < + all_bal_stats$bal_thresh, 1, 0) ) } else{ all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = balance_thresh) - all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) ) + all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < + all_bal_stats$bal_thresh, 1, 0) ) } cat("\n") @@ -289,10 +291,10 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty } w <- weights[[1]] + bal_stats <- calcBalStats(data, formulas, exposure, exposure_time_pts, outcome, balance_thresh, k = 0, weights = w, imp_conf, user.o) - bal_stats <- calcBalStats(data, formulas, exposure, outcome, balance_thresh, k = 0, weights = w, user.o) - - exposure_type <- ifelse(class(data[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary") + assign("exposure_type", ifelse(class(data[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary"), + envir = .GlobalEnv) # Gathering imbalanced covariate statistics for the final list/assessment of imbalanced covariates all_bal_stats <- data.frame( @@ -304,7 +306,6 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty bal_thresh = bal_stats$bal_thresh, balanced = bal_stats$balanced) - # Getting totals tot_covars <- sapply(strsplit(all_bal_stats$covariate, "\\."), `[`, 1) } @@ -313,40 +314,36 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty ### Plotting and summarizing tot_cons <- tot_covars[!duplicated(tot_covars)] # Total domains/constructs - # x_lab <- ifelse(exposure_type == "continuous", "Exposure-Covariate Correlation", "Standardized Mean Difference") - # Make love plot to summarize imbalance at each exposure time point data_type <- ifelse(class(data) == "mids" | class(data) == "list", "imputed", "single") - folder <- ifelse(type == "prebalance", "prebalance/", "weighted/") - weights_method = ifelse(type == "weighted", w$method, "no weights") + weights_method = ifelse(type == "weighted", weights[[1]]$method, "no weights") sapply(seq_along(exposure_time_pts), function(i) { exposure_time_pt <- exposure_time_pts[i] temp <- all_bal_stats %>% filter(exp_time == exposure_time_pt) - # make_love_plot(home_dir, folder, exposure, exposure_type, temp, data_type, balance_thresh, imp_conf) - make_love_plot(home_dir, folder, exposure, exposure_time_pt, exposure_type, k, form_name, temp, data_type, balance_thresh, weights_method, imp_conf) - + make_love_plot(home_dir, folder, exposure, exposure_time_pt, exposure_type, k = 0, form_name, temp, + data_type, balance_thresh, weights_method, imp_conf) }) - if (user.o == TRUE){ if (class(data) == "mids" | class(data) == "list"){ cat(paste0("USER ALERT: Summary plots for ", form_name, " ", exposure, - " averaged across all imputations have been saved out for each time point in the 'balance/", type, "/plots/' folder."), "\n") + " averaged across all imputations have been saved out for each time point in the 'balance/", + type, "/plots/' folder."), "\n") } else{ - - message(paste0("USER ALERT: Summary plots for ", form_name, " ", exposure, " have now been saved out for each time point in the 'balance/", type, "/plots/' folder."), "\n") + message(paste0("USER ALERT: Summary plots for ", form_name, " ", exposure, + " have now been saved out for each time point in the 'balance/", type, "/plots/' folder."), "\n") } } # Save out all correlations/std mean differences sink(paste0(home_dir, "/balance/", type, "/", exposure, "-", outcome, "_all_", type, "_", weights_method, "_associations.html")) - stargazer::stargazer(all_bal_stats, type = "html", digits = 2, column.labels = colnames(unbalanced_covars), summary = FALSE, + stargazer::stargazer(all_bal_stats, type = "html", digits = 2, column.labels = colnames(all_bal_stats), summary = FALSE, rownames = FALSE, header = FALSE, out = paste0(home_dir, "/balance/", type, "/", exposure, "-", outcome, "_all_", type,"_", weights_method, "_assocations.html")) sink() @@ -362,13 +359,6 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty } - # Save out all correlations/std mean differences - sink(paste0(home_dir, "/balance/", type, "/", exposure, "-", outcome, "_all_", type, "_", weights_method, "_associations.html")) - stargazer(all_bal_stats, type = "html", digits = 2, column.labels = colnames(unbalanced_covars), summary = FALSE, - rownames = FALSE, header = FALSE, out = paste0(home_dir, "/balance/", type, "/", exposure, "-", - outcome, "_all_", type,"_", weights_method, "_assocations.html")) - sink() - # Saving out all pre-balance associations write.csv(all_bal_stats, paste0(home_dir, "/balance/", type, "/", exposure, "_", type, "_", weights_method, "_stat_summary.csv"), row.names = FALSE) @@ -378,13 +368,16 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty cat("\n") } - # Finding all imbalanced variables\ + # Finding all imbalanced variables unbalanced_covars <- all_bal_stats %>% filter(balanced == 0) + unbalanced_constructs <- sapply(strsplit(unbalanced_covars$covariate, "\\."), + "[",1)[!duplicated(sapply(strsplit(unbalanced_covars$covariate, "\\."), "[",1))] if (class(data) == "mids" | class(data) == "list"){ - cat(paste0("USER ALERT: Averaging across all imputed datasets for exposure ", exposure, " using the ", form_name, " formulas and ", weights_method, " :"), "\n") + cat(paste0("USER ALERT: Averaging across all imputed datasets for exposure ", exposure, " using the ", + form_name, " formulas and ", weights_method, " :"), "\n") cat(paste0("The median absolute value relation between exposure and confounder is ", round(median(abs(all_bal_stats$avg_bal)), 2), " (range = ", round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_stats$avg_ba), 2), ")."), "\n") @@ -413,9 +406,9 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty } cat("\n") - cat("\n") cat(knitr::kable(unbalanced_covars, caption = "Imbalanced Covariates", format = 'pipe'), sep = "\n") + cat("\n") # Save out only imbalanced covariates sink(paste0(home_dir, "/balance/", type, "/", exposure, "-", outcome, "_",type,"_", weights_method, "_all_covariates_imbalanced.html")) @@ -424,7 +417,6 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty sink() all_bal_stats - } diff --git a/R/calcBalStats.R b/R/calcBalStats.R index e504d719..9b38ea8b 100644 --- a/R/calcBalStats.R +++ b/R/calcBalStats.R @@ -1,13 +1,5 @@ #' Code to calculate balance stats based on Jackson paper (either weighted or unweighted) #' -#' @param object msm object that contains all relevant user inputs -#' @param imp_wide_data wide data -#' @param forms from createForms(), createShortForms, or updateForms -#' @param f_type form type label -#' @param exposure exposure -#' @param outcome outcome -#' @param weighted binary indicator of whether to calculate weighted balance stats -#' @param histories binary indicator of whether to cat history sample distributions #' @importFrom ggplot2 ggplot #' @importFrom ggplot2 theme #' @importFrom ggplot2 geom_text @@ -25,24 +17,23 @@ #' @importFrom ggplot2 ggsave #' @importFrom stargazer stargazer #' @export -#' @examples calcBalStats(object, imp_wide_data, forms, f_type, exposure, outcome, k=1, weighted=0, histories=1) #' -calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1, k = 0, weights = NULL, imp_conf = NULL, user.o){ +calcBalStats <- function(data, formulas, exposure, exposure_time_pts, outcome, balance_thresh, k = 0, weights = NULL, imp_conf = NULL, user.o){ library(cobalt) set.seed(1234) - exposure_time_pts <- as.numeric(sapply(strsplit(tv_confounders[grepl(exposure, tv_confounders)] , "\\."), "[",2)) + # exposure_time_pts <- as.numeric(sapply(strsplit(tv_confounders[grepl(exposure, tv_confounders)] , "\\."), "[",2)) form_name <- sapply(strsplit(names(formulas[1]), "_form"), "[",1) exposure_type <- ifelse(class(data[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary") weighted = ifelse(!is.null(weights), 1, 0) factor_covariates <- colnames(data)[which(sapply(data, class) == "factor")] - factor_covariates <- factor_covariates[!factor_covariates %in% ID] + factor_covariates <- factor_covariates[!factor_covariates %in% "ID"] if (weighted == 1){ weights_method = weights$method - w <- weights$weights - data <- data %>% dplyr::mutate(weights = as.numeric(w)) + w <- weights$weights #change this to reflect each time point + data <- data %>% dplyr::mutate(weights = as.numeric(w)) #adds iptw-specific weights }else{ weights_method <- "no weights" } @@ -50,17 +41,21 @@ calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1 folder <- ifelse(weighted == 0, "prebalance/", "weighted/") data_type <- ifelse(k == 0, "single", "imputed") - if (data_type == "imputed"){ + if (data_type == "imputed" & user.o == TRUE){ cat(paste0("**Imputation ", k, "**"), "\n") } #creating initial data frames #data frame with all sampling weights for all exposures at all exposure time points for all histories - all_prop_weights <- data.frame("ID" = NA,exposure = NA, exp_time = NA, history = NA) + all_prop_weights <- data.frame("ID" = NA, + exposure = NA, + exp_time = NA, + history = NA) all_bal_stats <- data.frame() - for (z in 1:length(exposure_time_pts)){ #cycles through exposure time points + #cycles through exposure time points + for (z in 1:length(exposure_time_pts)){ exposure_time_pt <- exposure_time_pts[z] lagged_time_pts <- exposure_time_pts[exposure_time_pts median(data[, paste0(exposure, ".", exposure_time_pt)]) & data$flag == flag, t, NA) # finding those w/ vals > median exp @ time pt & flagged at prev t's + data$flag <- ifelse(data[, exps_time[t]] > median(data[, paste0(exposure, ".", exposure_time_pt)]) + & data$flag == flag, t, NA) # finding those w/ vals > median exp @ time pt & flagged at prev t's } else { # binary exp data$flag <- ifelse(data[, exps_time[t]] == 1 & data$flag == flag, t, NA) # if exposure is present & flagged at prev t's } @@ -158,12 +158,14 @@ calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1 } # ends history loop (e.g., "l-l-l") #summarize contributors to each history - prop_sum <- prop_weights %>% dplyr::group_by(as.factor(history)) %>% dplyr::summarize(n = dplyr::n()) + prop_sum <- prop_weights %>% + dplyr::group_by(as.factor(history)) %>% + dplyr::summarize(n = dplyr::n()) # GET BALANCE STATISTICS FOR T>1 (when there is a history to weight on) if (length(lagged_time_pts) > 0) { # Merge data with prop_weights by ID - temp <- merge(data, prop_weights, by = ID, all.x = TRUE) + temp <- merge(data, prop_weights, by = "ID", all.x = TRUE) # Removing any histories that only have 1 or 0 person contributing (cannot calc bal stats) if (sum(prop_sum$n == 1) > 0 | sum(prop_sum$n == 0) > 0) { @@ -186,7 +188,9 @@ calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1 if (weighted == 0) { # no IPTW weighting if (exposure_type == "continuous") { bal_stats <- sapply(sort(unique(temp$history)), function(i) { # finding balance by history - temp2 <- temp %>% dplyr::filter(history == i) + temp2 <- temp %>% + dplyr::filter(history == i) + cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance subset = temp2$history[temp2$history == i] == i) # subsetting by that history }) @@ -196,17 +200,20 @@ calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1 }) bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after weighting by history - bal_stats <- bal_stats %>% dplyr::mutate(std_bal_stats = weighted_bal_stats / - (sapply(seq(ncol(data[, covars])), function(x) { + bal_stats <- bal_stats %>% + dplyr::mutate(std_bal_stats = weighted_bal_stats / (sapply(seq(ncol(data[, covars])), function(x) { sd(as.numeric(data[, covars][, x]), na.rm = TRUE) # unweighted covar sd }) * sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE))) # exposure SD at that time pt - bal_stats <- bal_stats %>% dplyr::select(contains(c("std"))) + bal_stats <- bal_stats %>% + dplyr::select(contains(c("std"))) } #ends continuous if (exposure_type == "binary") { bal_stats <- sapply(sort(unique(temp$history)), function(i) { - temp2 <- temp %>% dplyr::filter(history == i) + temp2 <- temp %>% + dplyr::filter(history == i) + cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference subset = temp2$history[temp2$history == i] == i) # subsetting by that history }) @@ -217,21 +224,26 @@ calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1 bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after finding weighted balance stats - bal_stats <- bal_stats%>%dplyr::mutate(std_bal_stats = weighted_bal_stats/ - sapply(seq(ncol(data[,covars])), function(x){ + bal_stats <- bal_stats %>% + dplyr::mutate(std_bal_stats = weighted_bal_stats/sapply(seq(ncol(data[,covars])), function(x){ sqrt(mean( #dividing by pool SD estimate (unadjusted) var(as.numeric(data[paste0(exposure, ".", exposure_time_pts[1]) == 1, covars[x]])), #treated var var(as.numeric(data[paste0(exposure, ".", exposure_time_pts[1]) == 0, covars[x]]))))})) #untreated var - bal_stats <- bal_stats %>% dplyr::select(contains(c("std"))) + bal_stats <- bal_stats %>% + dplyr::select(contains(c("std"))) } #ends binary } #ends weighted=0 + # Weighted balance checking if (weighted == 1) { # if weighted, use IPTW weights from weightitmsm + if (exposure_type == "continuous") { # finds balance for each covariate clustered/subset by history bal_stats <- sapply(sort(unique(temp$history)), function(i) { - temp2 <- temp %>% dplyr::filter(history == i) + temp2 <- temp %>% + dplyr::filter(history == i) + cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance subset = temp2$history[temp2$history == i] == i, # subsetting by that history weights = temp2[, "weights"]) # adding IPTW weights @@ -242,7 +254,8 @@ calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1 }) bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after weighting by history - bal_stats <- bal_stats %>% dplyr::mutate(std_bal_stats = weighted_bal_stats / + bal_stats <- bal_stats %>% + dplyr::mutate(std_bal_stats = weighted_bal_stats / (sapply(seq(ncol(data[, covars])), function(x) { sd(as.numeric(data[, covars][, x]), na.rm = TRUE) # unweighted covar sd }) * sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE))) @@ -250,14 +263,17 @@ calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1 # For a weighted_bal_stat of 0, make std stat also 0 so as not to throw an error bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 - bal_stats <- bal_stats %>% dplyr::select(contains(c("std"))) + bal_stats <- bal_stats %>% + dplyr::select(contains(c("std"))) } #ends continuous if (exposure_type == "binary") { # finds balance for each covariate clustered/subset by history bal_stats <- sapply(sort(unique(temp$history)), function(i) { - temp2 <- temp %>% dplyr::filter(history == i) + temp2 <- temp %>% + dplyr::filter(history == i) + cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference subset = temp2$history[temp2$history == i] == i, # subsetting by that history weights = temp2[,"weights"]) # adding IPTW weights @@ -269,15 +285,18 @@ calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1 bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after finding weighted balance stats - bal_stats <- bal_stats %>% dplyr::mutate(std_bal_stats=weighted_bal_stats/ - sapply(seq(ncol(data[,covars])), function(x){ + bal_stats <- bal_stats %>% + dplyr::mutate(std_bal_stats=weighted_bal_stats/ sapply(seq(ncol(data[,covars])), function(x){ sqrt(mean( #dividing by pool SD estimate (unadjusted) - var(as.numeric(data[paste0(exposure, ".", exposure_time_pts[1]) == 1, covars[x]])), #treated var - var(as.numeric(data[paste0(exposure, ".", exposure_time_pts[1]) == 0, covars[x]]))))})) #untreated var + var(as.numeric(data[paste0(exposure, ".", + exposure_time_pts[1]) == 1, covars[x]])), #treated var + var(as.numeric(data[paste0(exposure, ".", + exposure_time_pts[1]) == 0, covars[x]]))))})) #untreated var # For a weighted_bal_stat of 0, make std stat also 0 so as not to throw an error bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 - bal_stats <- bal_stats %>% dplyr::select(contains(c("std"))) + bal_stats <- bal_stats %>% + dplyr::select(contains(c("std"))) } #ends binary } #ends weighted @@ -297,11 +316,15 @@ calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1 #adds custom bal thresh info if (!is.null(imp_conf)){ - bal_stats <- bal_stats %>% dplyr::mutate(bal_thresh = ifelse(bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2])) - bal_stats <- bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) ) + bal_stats <- bal_stats %>% + dplyr::mutate(bal_thresh = ifelse(bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2])) + bal_stats <- bal_stats %>% + dplyr:: mutate(balanced = ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) ) } else{ - bal_stats <- bal_stats %>% dplyr::mutate(bal_thresh = balance_thresh) - bal_stats <- bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) ) + bal_stats <- bal_stats %>% + dplyr::mutate(bal_thresh = balance_thresh) + bal_stats <- bal_stats %>% + dplyr:: mutate(balanced = ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) ) } bal_stats <- bal_stats %>% @@ -332,21 +355,24 @@ calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1 imbalanced_n = sum(balanced == 0), # Tallying imbalanced covariates n = dplyr::n()) - write.csv(bal_summary_exp, paste0(home_dir, "/balance/", folder, form_name, "_", exposure, "_", k, "_", weights_method, "_balance_stat_summary.csv")) + write.csv(bal_summary_exp, paste0(home_dir, "/balance/", folder, form_name, "_", exposure, "_", k, "_", + weights_method, "_balance_stat_summary.csv")) cat("\n") cat(paste0("Balance statistics using ", form_name, " formulas for ", exposure, ", imputation ", k, ", weighting method ", weights_method, " have been saved in the 'balance/", folder, "' folder"), "\n") - write.csv(all_prop_weights, paste0(home_dir, "/balance/", folder, "/", form_name, "_form_", exposure, "_", k, "_", weights_method, "_history_sample_weight.csv")) - cat(paste0("Sampling weights ", "using the ", form_name, " for ", exposure, ", imputation ", k, " have been saved in the 'balance/", folder, "' folder"), "\n") + write.csv(all_prop_weights, paste0(home_dir, "/balance/", folder, "/", form_name, "_form_", exposure, "_", k, + "_", weights_method, "_history_sample_weight.csv")) + cat(paste0("Sampling weights ", "using the ", form_name, " for ", exposure, ", imputation ", k, + " have been saved in the 'balance/", folder, "' folder"), "\n") cat("\n") # GETS total possible COVARIATES FROM FORM FOR ASSESSING BALANCE all_form <- as.data.frame(do.call(rbind, formulas)) tot_covars <- deparse(all_form[, 3], width.cutoff = 300) - tot_covars=as.character(unlist(strsplit(tot_covars, "\\+")))[!grepl("form", as.character(unlist(strsplit(tot_covars, "\\+"))))] + tot_covars <- as.character(unlist(strsplit(tot_covars, "\\+")))[!grepl("form", as.character(unlist(strsplit(tot_covars, "\\+"))))] tot_covars <- gsub(" ", "", tot_covars) tot_covars <- na.omit(sapply(strsplit(tot_covars, "\\."), "[", 1)[!duplicated(sapply(strsplit(tot_covars, "\\."), "[", 1))]) @@ -360,7 +386,6 @@ calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1 total_domains <- length(tot_covars) # remaining_avg_abs_corr <- round(mean(abs(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"]), na.rm = TRUE), 2) - remaining_avg_abs_corr <- round(median(abs(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"]), na.rm = TRUE), 2) remaining_corr_range <- paste0(round(min(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"], na.rm = TRUE), 2), "-", round(max(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"], na.rm = TRUE), 2)) diff --git a/R/compareHelpers.R b/R/compareHelpers.R index cc526556..c30bdf18 100644 --- a/R/compareHelpers.R +++ b/R/compareHelpers.R @@ -1,4 +1,4 @@ -# FUNCTIONS +# FUNCTIONS called by compareHistories #custom contrasts get_reference_values <- function(d, reference) { @@ -15,12 +15,12 @@ get_comparison_values <- function(d, comp_histories) { d[x, unlist(strsplit(comp, "-"))[x]] }) }) - return(t(comp_vals)) + t(comp_vals) } create_custom_contrasts <- function(d, reference, comp_histories, exposure, preds) { - if (is.na(reference) | any(is.na(comp_histories))) { + if (is.na(reference) | is.null(comp_histories)) { return(NULL) # Invalid input, return early } @@ -32,7 +32,7 @@ create_custom_contrasts <- function(d, reference, comp_histories, exposure, pred y |> marginaleffects::hypotheses(cus_comps) }) - return(comps) + comps } @@ -50,13 +50,15 @@ create_custom_comparisons <- function(preds, ref_vals, comp_vals, exposure) { } if (nrow(comp_vals) > 1) { - colnames(cus_comps) <- paste0("(", paste0(paste0(ref_vals, collapse = ","), ") - (", apply(comp_vals, 1, paste, collapse = ",")), ")") + colnames(cus_comps) <- paste0("(", paste0(paste0(ref_vals, collapse = ","), ") - (", + apply(comp_vals, 1, paste, collapse = ",")), ")") } else { - colnames(cus_comps) <- paste0("(", paste0(paste0(ref_vals, collapse = ","), ") - (", paste0(paste0(comp_vals, collapse = ",")), ")")) + colnames(cus_comps) <- paste0("(", paste0(paste0(ref_vals, collapse = ","), ") - (", + paste0(paste0(comp_vals, collapse = ",")), ")")) } cus_comps[is.na(cus_comps)] <- 0 - return(cus_comps) + cus_comps } @@ -75,73 +77,64 @@ add_histories <- function(p, d) { history <- unlist(history) } - if("term" %in% colnames(p)){ + if("term" %in% colnames(p)) { #preds_pool history <- matrix(data = NA, nrow = nrow(p), ncol = 1) # Get histories from the first element - if(grepl("\\=", p$term[1])){ + if(grepl("\\=", p$term[1])) { for (i in 1:nrow(p)){ vals <- as.numeric(sapply(strsplit(unlist(strsplit(as.character(p$term[i]), "\\,")), "="), "[",2)) - history[i] <- as.data.frame(paste(ifelse(round(vals,3)==round(d$l,3), "l", "h"), collapse="-")) + history[i] <- as.data.frame(paste(ifelse(round(vals, digits = 2) == round(d$l, digits = 2), "l", "h"), collapse = "-")) } history <- unlist(history) - }else{ + } else { for (i in 1:nrow(p)) { temp <- as.character(p$term[i]) pair <- lapply(1:2, function(y) { a <- sapply(strsplit(temp, " - "), "[", y) his <- lapply(1:nrow(d), function(z) { - ifelse(round(as.numeric(gsub("[^0-9.-]", "", sapply(strsplit(a, "\\,"), "[", z))), 3) == round(d[z, "l"], 3), "l", "h") + ifelse(round(as.numeric(gsub("[^0-9.-]", "", + sapply(strsplit(a, "\\,"), "[", z))), 2) == round(d[z, "l"], 2), "l", "h") }) }) history[i, 1] <- paste(sapply(pair, paste, collapse = "-"), collapse = " vs ") } } - } - p <- cbind (p, history = history) - p } -add_dose <- function(p, dose_level){ - if( length(p$history[1]) == 1 ){ - if(grepl("vs", p$history[1])){ +add_dose <- function(p, dose_level) { + if( length(p$history[1]) == 1 ) { + if(grepl("vs", p$history[1])) { dose_a <- stringr::str_count(sapply(strsplit(p$history, "vs"), "[", 1), dose_level) dose_b <- stringr::str_count(sapply(strsplit(p$history, "vs"), "[", 2), dose_level) dose_count <- data.frame(dose = gsub(" ", " vs ", paste(dose_a, dose_b))) } else{ - dose_count <- stringr::str_count(p$history, dose_level) - } } - if (length(p$history[1]) > 1){ dose_count <- stringr::str_count(p$history, dose_level) } - - # dose_count <- rep(list(dose_count), length(p)) p <- cbind (p, dose_count = dose_count) p } -perform_multiple_comparison_correction <- function(comps, method) { +perform_multiple_comparison_correction <- function(comps, reference, comp_histories, method) { if (any(is.na(reference) & is.na(comp_histories)) | length(comp_histories) > 1) { cat("\n") cat(paste0("Conducting multiple comparison correction using the ", method, " method."), "\n") cat("\n") - corr_p <- lapply(comps, function(x) { - stats::p.adjust(x$p.value, method = method) - }) - comps <- Map(cbind, comps, p.value_corr = corr_p) + corr_p <-stats::p.adjust(comps$p.value, method = method) + comps <- cbind(comps, p.value_corr = corr_p) } else { - cat(paste0("The user specified comparison only between ", reference, " and ", comp_histories, - " so no correction for multiple comparisons will be implemented."), "\n") + cat("\n") + cat(paste0("The user specified comparison only between ", reference, " and a single comparison, ", comp_histories, + ", so no correction for multiple comparisons will be implemented."), "\n") } - - return(comps) + comps } diff --git a/R/compareHistories.R b/R/compareHistories.R index 549c9862..c9d154c5 100644 --- a/R/compareHistories.R +++ b/R/compareHistories.R @@ -1,8 +1,6 @@ #' Compare exposure histories #' This code uses the best-fitting model for each exposure-outcome pair to compare the effects of user-specified reference and comparison histories of exposure on outcome using linear hypothesis testing -#' @param object msm object that contains all relevant user inputs -#' @param data_for_model_with_weights_cutoff imputed datasets with truncated weights -#' @param all_models fitted models for each imputed dataset +#' @param model fitted models for each imputed dataset #' @param reference optional reference event for custom comparison #' @param compare optional comparison event(s) for custom comparison #' @importFrom gtools permutations @@ -15,11 +13,9 @@ #' @importFrom stats p.adjust #' @importFrom knitr kable #' @return preds_pool -#' @examples compareHistories(object, data_for_model_with_weights_cutoff, all_models, reference=NA, compare=NA) -compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, epochs = NULL, hi_lo_cut = NA, reference = NA, comparison = NULL, mc_comp_method = "BH", dose_level = "h", exp_lab = NA, out_lab = NA, colors = "Dark2" ) { +compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, epochs = NULL, hi_lo_cut = NA, reference = NA, comparison = NULL, mc_comp_method = "BH", dose_level = "h", exp_lab = NA, out_lab = NA, colors = "Dark2", user.o = TRUE ) { - #error checking if (!dir.exists(home_dir)) { stop("Please provide a valid home directory path.") } @@ -28,15 +24,13 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ if (!dir.exists(histories_dir)) { dir.create(histories_dir) } - plot_dir <- file.path(home_dir, "plots") if (!dir.exists(plot_dir)) { dir.create(plot_dir) } - # exposure_time_pts <- as.numeric(sapply(strsplit(tv_confounders[grepl(exposure, tv_confounders)] , "\\."), "[",2)) - - exposure_type <- ifelse(class(model[[1]]$data[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary") + exposure_type <- ifelse(class(model[[1]]$data[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", + "continuous", "binary") if( is.null(epochs)){ #making epochs time pts if not specified by user epochs <- data.frame(epochs = as.character(exposure_time_pts), @@ -50,15 +44,17 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ #history inspection - #print history sample distribution --check this - eval_hist(data = model[[1]]$data, exposure, tv_confounders, epochs, - time_pts= as.numeric(sapply(strsplit(tv_confounders[grepl(exposure, tv_confounders)] , "\\."), "[",2)), - hi_lo_cut, reference, comparison) + #print history sample distribution + if (user.o == TRUE){ + eval_hist(data = model[[1]]$data, exposure, tv_confounders, epochs, + exposure_time_pts, hi_lo_cut, reference, comparison) + } # gathering epoch info eps <- epochs$epochs # creates permutations of high ("h") and low ("l") levels of exposure for each exposure epoch - exposure_levels <- apply(gtools::permutations(2, nrow(epochs), c("l", "h"), repeats.allowed = TRUE), 1, paste, sep = "", collapse = "-") + exposure_levels <- apply(gtools::permutations(2, nrow(epochs), c("l", "h"), + repeats.allowed = TRUE), 1, paste, sep = "", collapse = "-") exp_epochs <- apply(expand.grid(exposure, as.character(epochs[, 1])), 1, paste, sep = "", collapse = "_") @@ -83,7 +79,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ stop(paste0('If you wish to specify comparison(s), please provide at least one comparison history from the following list ', paste0(apply(gtools::permutations(2, nrow(epochs), c("l", "h"), repeats.allowed = TRUE), 1, paste, sep = "", collapse = "-"), sep = ", ", collapse = " "), - " otherwise, do not specify reference and comparison events to conduct all comparisons.")) + " otherwise, do not specify reference or comparison events to conduct all comparisons.")) } comp_histories <- exposure_levels[exposure_levels %in% comparison] } else { @@ -92,6 +88,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ ints <- gsub(" ", "", as.character(unlist(strsplit(as.character(unlist(model[[1]]$terms)), "\\+")))) ints <- ifelse(sum(grepl(":", ints)) > 0, 1, 0) + # gathering epoch information for each exposure for deriving betas epoch_info <- as.data.frame(rep(exposure, length(eps))) epoch_info$time <- eps @@ -101,10 +98,9 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ # cycling through eps to find hi and lo values of the exposure for each epoch based on user-specified values if (exposure_type == "continuous") { - if (is.null(names(model))){ #imputed data - # stacking imputed data to compute hi/lo vals across all - data <- lapply(model, function(x) { x$data })[[1]] + # stacking imputed data to compute hi/lo vals across all imps + data <- lapply(model, function(x) { x$data }) data <- do.call("rbind", data) } @@ -120,7 +116,6 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ epoch_info$high[t] <- as.numeric(median(data[, var_name], na.rm = T)) } else{ - if (length(hi_lo_cut) == 2){ hi_cutoff <- hi_lo_cut[1] lo_cutoff <- hi_lo_cut[2] @@ -132,7 +127,6 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ stop('Please select low cutoff value between 0 and 1') } } else{ - if (hi_lo_cut > 1 || hi_lo_cut < 0) { stop('Please select a hi_lo cutoff value between 0 and 1') } @@ -140,8 +134,6 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ hi_cutoff <- hi_lo_cut lo_cutoff <- hi_lo_cut } - - epoch_info$low[t] <- as.numeric(quantile(data[, var_name], probs = lo_cutoff, na.rm = T)) epoch_info$high[t] <- as.numeric(quantile(data[, var_name], probs = hi_cutoff, na.rm = T)) } @@ -156,7 +148,6 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ } } - # gather high and low values for each exposure epoch based on quantile values d <- data.frame(e = paste(epoch_info[[1]], epoch_info[[2]], sep = "_"), l = epoch_info$low, @@ -181,14 +172,13 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ #STEP 2: CONDUCT HISTORY COMPARISONS #conduct all pairwise comparisons if no ref/comparisons were specified by user - if (sum(is.na(reference) & is.na(comp_histories))==1){ + if (sum(is.na(reference) & is.na(comp_histories)) == 1){ # Pairwise comparisons; don't need to use custom class comps <- lapply(preds, function(y){ y |> marginaleffects::hypotheses("pairwise") }) } else{ - comps <- create_custom_contrasts(d, reference, comp_histories, exposure, preds) } @@ -208,7 +198,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ # STEP 1c: Pooling predicted estimates # Pool results - preds_pool <- mice::pool(mice::as.mira(y)) |> summary(digits = 3) + preds_pool <- mice::pool(mice::as.mira(preds)) |> summary(digits = 3) preds_pool <- add_histories(preds_pool, d) @@ -217,26 +207,30 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ # If the user specified reference and comparison groups, subset pred_pool for inspection and plotting if (!is.na(reference) & sum(is.na(comp_histories)) == 0) { preds_pool <- preds_pool %>% dplyr::filter(history %in% c(reference, comp_histories)) - } - cat("\n") - cat("Below are the pooled average predictions by user-specified history:") # Not sure if we need to print this? - print(preds_pool) + if (user.o == TRUE){ + cat("\n") + cat("Below are the pooled average predictions by user-specified history:") # + cat("\n") + cat(knitr::kable(preds_pool, format = 'pipe', digits = 2), sep = "\n") + cat("\n") + + } - # Makes table of average estimates and saves out - sink(paste0(home_dir, "/histories/", exposure, "-", outcome, "_estimated_means_", - "_hi=", hi_cutoff, "_lo=", lo_cutoff, ".html")) + # Makes table of pooled average estimates and saves out + sink(paste0(home_dir, "/histories/", exposure, "-", outcome, "_pooled_estimated_means_hi=", + hi_cutoff, "_lo=", lo_cutoff, ".html")) stargazer::stargazer( - as.data.frame(y), + as.data.frame(preds_pool), type = "html", digits = 4, - column.labels = colnames(y), + column.labels = colnames(preds_pool), summary = FALSE, rownames = FALSE, header = FALSE, - out = paste0(home_dir, "/histories/", exposure, "-", outcome, "_estimated_means_", - "_hi=", hi_cutoff, "_lo=", lo_cutoff, ".html" + out = paste0(home_dir, "/histories/", exposure, "-", outcome, "_pooled_estimated_means_hi=", + hi_cutoff, "_lo=", lo_cutoff, ".html" ) ) sink() @@ -252,12 +246,32 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ #STEP 3: conduct multiple comparison correction - comps_pool <- perform_multiple_comparison_correction(comps_pool, method) + comps_pool <- perform_multiple_comparison_correction(comps_pool, reference, comp_histories, mc_comp_method) + + if (user.o == TRUE){ + cat("\n") + cat(paste0("USER ALERT: please inspect the following pooled comparisons :"), "\n") + cat(knitr::kable(comps_pool, format = 'pipe', digits = 2), sep = "\n") + cat("\n") + cat("\n") + } - cat(paste0("USER ALERT: please inspect the following comparisons :"), "\n") - cat(knitr::kable(comps_pool, format = 'pipe', digits = 2), sep = "\n") - cat("\n") - cat("\n") + # Makes table of pooled comparisons and saves out + sink(paste0(home_dir, "/histories/", exposure, "-", outcome, "_pooled_comparisons_hi=", + hi_cutoff, "_lo=", lo_cutoff, ".html")) + stargazer::stargazer( + as.data.frame(comps_pool), + type = "html", + digits = 4, + column.labels = colnames(comps_pool), + summary = FALSE, + rownames = FALSE, + header = FALSE, + out = paste0(home_dir, "/histories/", exposure, "-", outcome, "_pooled_comparisons_hi=", + hi_cutoff, "_lo=", lo_cutoff, ".html" + ) + ) + sink() # for plotting @@ -274,17 +288,17 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ # If the user specified reference and comparison groups, subset preds for inspection and plotting if (!is.na(reference) & sum(is.na(comp_histories)) == 0) { - # preds <- lapply(preds, function(y) { - # as.data.frame(y) %>% dplyr::filter(history %in% c(reference, comp_histories)) - preds <- preds %>% dplyr::filter(history %in% c(reference, comp_histories)) - - # }) + preds <- preds %>% + dplyr::filter(history %in% c(reference, comp_histories)) } - cat("\n") - cat("Below are the average predictions by user-specified history:", "\n") # Not sure if we need to print this? - print(preds) - cat("\n") + if (user.o == TRUE){ + cat("\n") + cat("Below are the average predictions by user-specified history:", "\n") # Not sure if we need to print this? + cat("\n") + cat(knitr::kable(preds, format = 'pipe', digits = 2), sep = "\n") + cat("\n") + } # Makes table of average estimates a @@ -307,35 +321,44 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ #STEP 3: conduct multiple comparison correction - comps <- comps[[1]] comps <- add_histories(comps, d) comps <- add_dose(comps, dose_level) - comps <- perform_multiple_comparison_correction(comps, method) - - # comps$term <- lapply(comps$term, round, 3) - # lapply(comps$term, function(x){lapply(x, function(y){ round(y, 3)})}) - # x=comps[[1]]$term - # gsub("(", "", as.character(unlist(strsplit(x, " - ")))) - - cat("\n") - cat(paste0("USER ALERT: please inspect the following comparisons :"), "\n") - cat(knitr::kable(comps, format = 'pipe', digits = 2), sep = "\n") - cat("\n") - cat("\n") + comps <- perform_multiple_comparison_correction(comps, reference, comp_histories, mc_comp_method) + if (user.o == TRUE){ + cat("\n") + cat(paste0("USER ALERT: please inspect the following comparisons:"), "\n") + cat("\n") + cat(knitr::kable(comps, format = 'pipe', digits = 2), sep = "\n") + cat("\n") + cat("\n") + } + # Makes table of comparisons and saves out + sink(paste0(home_dir, "/histories/", exposure, "-", outcome, "_comparisons_hi=", + hi_cutoff, "_lo=", lo_cutoff, ".html")) + stargazer::stargazer( + as.data.frame(comps), + type = "html", + digits = 4, + column.labels = colnames(comps), + summary = FALSE, + rownames = FALSE, + header = FALSE, + out = paste0(home_dir, "/histories/", exposure, "-", outcome, "_comparisons_hi=", + hi_cutoff, "_lo=", lo_cutoff, ".html" + ) + ) + sink() } - - #STEP 4: Plot results - if (is.na(exp_lab)){ exp_lab <- exposure } @@ -345,7 +368,6 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ for (x in seq_along(preds)) { comparisons <- data.frame(preds) - # comparisons$term <- gsub(paste0(exposure, "_"), "", comparisons$term) comparisons$low_ci <- comparisons$estimate - (1.96 * comparisons$std.error) comparisons$high_ci <- comparisons$estimate + (1.96 * comparisons$std.error) comparisons$history <- as.factor(comparisons$history) @@ -362,10 +384,11 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ ggplot2::geom_errorbarh(aes(xmin = low_ci, xmax = high_ci), height = 0.6) + ggplot2::xlab(paste0("Predicted ", out_lab, " Value")) + ggplot2::ylab(paste0(exp_lab, " Exposure History")) + - ggplot2::xlim(min(comparisons$low_ci) - 1 * sd(comparisons$low_ci), max(comparisons$high_ci) + 1 * sd(comparisons$high_ci)) + + ggplot2::xlim(min(comparisons$low_ci) - 1 * sd(comparisons$low_ci), + max(comparisons$high_ci) + 1 * sd(comparisons$high_ci)) + ggplot2::theme(text = ggplot2::element_text(size = 14)) + ggplot2::theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), - panel.background = element_blank(), axis.line = element_line(colour = "black")) + panel.background = element_blank(), axis.line = element_line(colour = "black")) ggplot2::ggsave(paste0(home_dir, "/plots/", exposure, "-", outcome, ".jpeg"), plot = p) @@ -374,27 +397,26 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ p <- ggplot2::ggplot(data = comparisons, aes(x = estimate, y = history, color = dose)) + ggplot2::geom_point(size = 5) + scale_colour_brewer(palette = colors) + - # ggplot2::scale_color_palette(name = "Dosage") + ggplot2::scale_y_discrete(limits = c(as.character(comparisons$history)), expand = c(0, 0.2)) + ggplot2::geom_errorbarh(aes(xmin = low_ci, xmax = high_ci), height = 0.6) + ggplot2::xlab(paste0("Predicted ", out_lab, " Value")) + ggplot2::ylab(paste0(exp_lab, " Exposure History")) + - ggplot2::xlim(min(comparisons$low_ci) - 1 * sd(comparisons$low_ci), max(comparisons$high_ci) + 1 * sd(comparisons$high_ci)) + + ggplot2::xlim(min(comparisons$low_ci) - 1 * sd(comparisons$low_ci), + max(comparisons$high_ci) + 1 * sd(comparisons$high_ci)) + ggplot2::theme(text = element_text(size = 14)) + ggplot2::theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), - panel.background = element_blank(), axis.line = element_line(colour = "black")) + + panel.background = element_blank(), axis.line = element_line(colour = "black")) + ggplot2::guides(fill=guide_legend(title="Dosage")) ggplot2::ggsave(paste0(home_dir, "/plots/", exposure, "-", outcome, ".jpeg"), plot = p) } - message("\n") - message("See the '/plots/' folder for graphical representations of results.") + if (user.o == TRUE){ + message("\n") + message("See the '/plots/' folder for graphical representations of results.") + } } - - comps - } diff --git a/R/createFormulas.R b/R/createFormulas.R index ed093581..7dba3670 100644 --- a/R/createFormulas.R +++ b/R/createFormulas.R @@ -36,14 +36,12 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_co if (!dir.exists(forms_dir)) { dir.create(forms_dir) } - # Create type directory forms_dir <- file.path(home_dir, "formulas", type) if (!dir.exists(forms_dir)) { dir.create(forms_dir) } - factor_covariates <- colnames(data)[which(sapply(data, class) == "factor")] factor_covariates <- factor_covariates[!factor_covariates %in% "ID"] @@ -61,7 +59,8 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_co if (type == "short"){ message("Please manually inspect the short balancing formula below that includes time-varying confounders at t-1 only:") - time_var_include <- time_varying_covariates[as.numeric(sapply(strsplit(time_varying_covariates, "\\."), "[", 2)) == exposure_time_pts[x-1]] + time_var_include <- time_varying_covariates[as.numeric(sapply(strsplit(time_varying_covariates, "\\."), "[", 2)) == + exposure_time_pts[x-1]] } if (type == "update"){ @@ -71,7 +70,8 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_co message("Please manually inspect the updated balancing formula below that includes time-varying confounders at t-1 and those greater at further lags that remained imbalanced:") - time_var_include <- time_varying_covariates[as.numeric(sapply(strsplit(time_varying_covariates, "\\."), "[", 2)) == exposure_time_pts[x-1]] + time_var_include <- time_varying_covariates[as.numeric(sapply(strsplit(time_varying_covariates, "\\."), "[", 2)) == + exposure_time_pts[x-1]] if (x > 1) { new <- bal_stats %>% @@ -93,11 +93,13 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_co if (user.o == TRUE){ cat(paste0("For ", exposure, " at exposure time point ", time , ", the following covariate(s) will be added to the short balancing formula: "), paste(new, collapse = ", "), "\n") + cat("\n") } }else{ if (user.o == TRUE) { cat(paste0("For ", exposure, " at exposure time point ", time , " no time-varying confounders at additional lags were added."), "\n") + cat("\n") } } } @@ -126,9 +128,15 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_co stop("The variables in the keep_conf field must be included in tv_confounders.") } keep_conf <- keep_conf[!paste0(exposure, ".", time) %in% keep_conf] - vars_to_include <- c(vars_to_include, keep_conf[as.numeric(sapply(strsplit(keep_conf, "\\."), "[", 2)) < time]) + + if (length(keep_conf[as.numeric(sapply(strsplit(keep_conf, "\\."), "[", 2)) < time]) > 0){ + if(! keep_conf[as.numeric(sapply(strsplit(keep_conf, "\\."), "[", 2)) < time] %in% vars_to_include){ + vars_to_include <- c(vars_to_include, keep_conf[as.numeric(sapply(strsplit(keep_conf, "\\."), "[", 2)) < time]) + } + } } + # Creates form for the given exposure time point f <- as.formula(paste(paste0(exposure, ".", time, " ~ "), paste0(vars_to_include[order(vars_to_include)], sep = "", collapse = " + "))) diff --git a/R/createWeights.R b/R/createWeights.R index 47d9aecb..48e5d70b 100644 --- a/R/createWeights.R +++ b/R/createWeights.R @@ -1,9 +1,6 @@ #' Creates balancing weights for potential confounding covariates in relation to exposure at each time point #' #' Returns a list of weights_models -#' @param object msm object that contains all relevant user inputs -#' @param wide_long_datasets from formatForWeights -#' @param short_forms from createShortForms #' @param read_in_from_file optional binary indicator of whether weights should be read in from a local file #' @return weights_models #' @export @@ -12,34 +9,24 @@ #' @importFrom ggplot2 ggsave #' @importFrom WeightIt weightitMSM #' @importFrom cobalt bal.tab -#' @seealso [CBPS::CBPS()], [formatForWeights()], [createShortForms()] -#' @examples createWeights(object, wide_long_datasets, short_forms, read_in_from_file="no") createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, formulas, method = "cbps", read_in_from_file = "no", user.o = TRUE) { - #error checking if (!dir.exists(home_dir)) { stop("Please provide a valid home directory path.") } - if(! method %in% c("ps", "glm", "gbm", "bart", "super", "cbps")){ stop("Please provide a weights method from this list: 'ps', 'glm', 'gbm', 'bart', 'super', 'cbps'.") } - if (!class(data) %in% c("mids", "data.frame", "character")) { stop("Please provide either a 'mids' object, a data frame, or a directory with imputed csv files in the 'data' field.") } # Load required libraries library(cobalt) - library(ggplot2) - library(WeightIt) - - # Extracting arguments from the msm object exposure_time_pts <- as.numeric(sapply(strsplit(tv_confounders[grepl(exposure, tv_confounders)], "\\."), "[",2)) time_varying_covariates <- tv_confounders weights_method <- method - # exposure_type <- if (length(unique(data[, paste0(exposure, ".", exposure_time_pts[1])])) < 3) "binary" else "continuous" form_name <- sapply(strsplit(names(formulas[1]), "_form"), "[",1) # creating directories @@ -56,11 +43,9 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for dir.create(hist_dir) } - if (read_in_from_file == "yes") { tryCatch({ - weights = readRDS(paste0(home_dir, "/weights/", exposure, "-", outcome, "_", form_name, "_", weights_method, "_fit.rds")) - # saveRDS(weights, file = paste0(home_dir, "/weights/", exposure, "-", outcome, "_", form_name, "_", weights_method, "_fit.rds")) + weights <- readRDS(paste0(home_dir, "/weights/", exposure, "-", outcome, "_", form_name, "_", weights_method, "_fit.rds")) if (user.o == TRUE){ message("Reading in balancing weights from the local folder.") @@ -86,7 +71,7 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for stabilize = TRUE, density = "dt_2", use.kernel = TRUE, - weightit.force = TRUE, + include.obj = TRUE, over = FALSE) fit } @@ -111,13 +96,15 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for cat('\n') # Save weights merged with ID variable - write.csv(x = d, file = paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", form_name, "_", weights_method, "_", i, ".csv")) + write.csv(x = d, file = paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", form_name, + "_", weights_method, "_", i, ".csv")) # Writes image of the histogram of weights to assess heavy tails ggplot(data = as.data.frame(fit$weight), aes(x = fit$weight)) + geom_histogram(color = 'black', bins = 15) - ggsave(paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, "_", form_name, "_", weights_method, "_", i, ".png"), - height = 8, width = 14) + + ggsave(paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, "_", form_name, + "_", weights_method, "_", i, ".png"), height = 8, width = 14) fit }) @@ -125,7 +112,7 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for if (user.o == TRUE){ cat("Weights for each imputation have now been saved into the 'weights/values/' folder.") cat("\n") - cat("Weights histograms for each imputation have now been saved in the 'weights/histograms/' folder --likely has heavy tails.") + cat("Weights histograms for each imputation have now been saved in the 'weights/histograms/' folder --likely have heavy tails.") } } @@ -158,17 +145,24 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for d$weights <- fit$weights - message(paste0("USER ALERT: For imputation", i, " and ", weights_method, ", weighting method, the median weight value is ", round(median(fit$weights),2) , - " (SD= ", round(sd(fit$weights),2), "; range= ", round(min(fit$weights),2), "-", round(max(fit$weights),2), ")."), "\n") - cat('\n') + if (user.o == TRUE){ + message(paste0("USER ALERT: For imputation", i, " and ", weights_method, + ", weighting method, the median weight value is ", round(median(fit$weights),2) , + " (SD= ", round(sd(fit$weights),2), "; range= ", round(min(fit$weights),2), "-", + round(max(fit$weights),2), ")."), "\n") + cat('\n') + } # Save weights merged with ID variable - write.csv(x = d, file = paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", form_name, "_", weights_method, "_", i, ".csv")) + write.csv(x = d, file = paste0(home_dir, "/weights/values/", exposure, "-", outcome, + "_", form_name, "_", weights_method, "_", i, ".csv")) # Writes image of the histogram of weights to assess heavy tails ggplot2::ggplot(data = as.data.frame(fit$weight), aes(x = fit$weight)) + ggplot2::geom_histogram(color = 'black', bins = 15) - ggplot2::ggsave(paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, "_", form_name, "_", weights_method, "_", i, ".png"), + + ggplot2::ggsave(paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, + "_", form_name, "_", weights_method, "_", i, ".png"), height = 8, width = 14) fit @@ -196,20 +190,25 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for data$weights <- weights[[1]]$weights - message(paste0("USER ALERT: for the ", weights_method, " weighting method, the median weight value is ", round(median(data$weights), 2) , - " (SD= ", round(sd(data$weights), 2), "; range= ", round(min(data$weights), 2), "-", round(max(data$weights), 2), ")."), "\n") + if (user.o == TRUE){ + message(paste0("USER ALERT: for the ", weights_method, " weighting method, the median weight value is ", + round(median(data$weights), 2) , " (SD= ", round(sd(data$weights), 2), "; range= ", + round(min(data$weights), 2), "-", round(max(data$weights), 2), ")."), "\n") cat('\n') + } names(weights) <- "0" # Save weights merged with ID variable - write.csv(x = data, file = paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", form_name, "_", weights_method, ".csv")) + write.csv(x = data, file = paste0(home_dir, "/weights/values/", exposure, "-", outcome, + "_", form_name, "_", weights_method, ".csv")) # Writes image of the histogram of weights to assess heavy tails - ggplot2::ggplot(data = as.data.frame(fit$weight), aes(x = fit$weight)) + + ggplot2::ggplot(data = as.data.frame(weights[[1]]$weights), aes(x = weights[[1]]$weights)) + ggplot2::geom_histogram(color = 'black', bins = 15) - ggplot2::ggsave(paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, "_", form_name, "_", weights_method, ".png"), - height = 8, width = 14) + + ggplot2::ggsave(paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, + "_", form_name, "_", weights_method, ".png"), height = 8, width = 14) if (user.o == TRUE){ cat("Weights have now been saved into the 'weights/values/' folder.") @@ -218,7 +217,8 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for } } - saveRDS(weights, file = paste0(home_dir, "/weights/", exposure, "-", outcome, "_", form_name, "_", weights_method, "_fit.rds")) + saveRDS(weights, file = paste0(home_dir, "/weights/", exposure, "-", outcome, + "_", form_name, "_", weights_method, "_fit.rds")) if (user.o == TRUE){ message("Weights models have been saved as an .rds object in the 'weights' folder.") diff --git a/R/eval_hist.R b/R/eval_hist.R index 72ce74e5..834b1203 100644 --- a/R/eval_hist.R +++ b/R/eval_hist.R @@ -2,8 +2,10 @@ #function to evaluate distribution of sample in histories eval_hist <- function(data, exposure, tv_confounders, epochs, time_pts, hi_lo_cut, ref, comps){ + exposure_type <- ifelse(class(data[, paste0(exposure, '.', time_pts[1])]) == "numeric", "continuous", "binary") + epochs$epochs <- as.character(epochs$epochs) - time_varying_wide <- apply(expand.grid(time_varying_covariates, as.character(time_pts)), 1, paste, sep = "", collapse = ".") + time_varying_wide <- tv_confounders time_varying_wide <- sort(time_varying_wide) time_varying_wide <- c("ID", time_varying_wide) data_wide <- data @@ -33,7 +35,46 @@ eval_hist <- function(data, exposure, tv_confounders, epochs, time_pts, hi_lo_cu tot_hist <- tot_hist[tot_hist %in% c(ref, comps)] } + if (exposure_type == "continuous"){ + if(is.null(hi_lo_cut)){ + epoch_info$low[t] <- as.numeric(median(data[, var_name], na.rm = T)) + epoch_info$high[t] <- as.numeric(median(data[, var_name], na.rm = T)) + + new$history <- lapply(1:nrow(new), function(x) { + paste(lapply(1:nrow(epochs), function(y) { + if (is.na(new[x, y + 1])) { + return(NA) + } + if (new[x, y + 1] >= as.numeric(median(new[, y + 1], na.rm = TRUE))) { + return("h") + } + if (new[x, y + 1] <= as.numeric(median(new[, y + 1], na.rm = TRUE))) { + return("l") + } + }), collapse = "-") + }) + + } else{ + if (length(hi_lo_cut) == 2){ + hi_cutoff <- hi_lo_cut[1] + lo_cutoff <- hi_lo_cut[2] + + if (hi_cutoff > 1 || hi_cutoff < 0) { + stop('Please select a high cutoff value between 0 and 1') + } + if (lo_cutoff > 1 || lo_cutoff < 0) { + stop('Please select low cutoff value between 0 and 1') + } + } else{ + if (hi_lo_cut > 1 || hi_lo_cut < 0) { + stop('Please select a hi_lo cutoff value between 0 and 1') + } + hi_cutoff <- hi_lo_cut + lo_cutoff <- hi_lo_cut + } + } + new$history <- lapply(1:nrow(new), function(x) { paste(lapply(1:nrow(epochs), function(y) { if (is.na(new[x, y + 1])) { @@ -46,7 +87,9 @@ eval_hist <- function(data, exposure, tv_confounders, epochs, time_pts, hi_lo_cu return("l") } }), collapse = "-") - })} + }) + + } if (exposure_type == "binary"){ new$history <- lapply(1:nrow(new), function(x) { @@ -68,19 +111,24 @@ eval_hist <- function(data, exposure, tv_confounders, epochs, time_pts, hi_lo_cu dplyr::group_by(history) %>% dplyr::summarize(n = dplyr::n()) + if( !is.na(ref) & !is.null(comps)){ + his_summ <- his_summ %>% + dplyr::filter(history %in% c(ref, comps)) + } + his_summ <- his_summ[! grepl("NA", his_summ$history),] his_summ <- his_summ[! grepl("NULL", his_summ$history),] - message("For the following exposure epochs:") - print(epochs) + # cat("For the following exposure epochs,") + # cat(paste0(paste(epochs$epochs, collapse = ", "), " at ", paste(epochs$values, collapse = ", "), ":"), "\n") - message(paste0("USER ALERT: Out of the total of ", nrow(data_wide), " individuals in the sample, below is the distribution of the ", sum(his_summ$n), " (", + cat(paste0("USER ALERT: Out of the total of ", nrow(data_wide), " individuals in the sample, below is the distribution of the ", sum(his_summ$n), " (", round((sum(his_summ$n) / nrow(data_wide)) * 100, 2), "%) that fall into ", nrow(his_summ), " out of the ", length(tot_hist), " the total user-defined exposure histories created from ", hi_lo_cut[2] * 100, "th and ", hi_lo_cut[1] * 100, "th percentile values for low and high levels of exposure ", exposure, - ", respectively, across ", paste(epochs$epochs, collapse = ", "), + ", respectively, across ", paste(epochs$epochs, collapse = ", ")), "\n") - ". Please inspect the distribution of the sample across the following exposure histories and ensure there is sufficient spread to avoid extrapolation and low precision:"), "\n") + cat("Please inspect the distribution of the sample across the following exposure histories and ensure there is sufficient spread to avoid extrapolation and low precision:", "\n") if (nrow(his_summ) != length(tot_hist)) { cat(paste0("USER ALERT: There are no individuals in your sample that fall into ", @@ -89,9 +137,11 @@ eval_hist <- function(data, exposure, tv_confounders, epochs, time_pts, hi_lo_cu cat("\n") } + cat("\n") - cat(knitr::kable(his_summ, caption = paste0("Summary of User-Specified Exposure ", exposure, " Histories Based on Exposure Epochs ", - paste(epochs$epochs, collapse = ", "), " containing time points ", paste(epochs$values, collapse = ", "), + cat(knitr::kable(his_summ, caption = paste0("Summary of User-Specified Exposure ", exposure, + " Histories Based on Exposure Epochs ", paste(epochs$epochs, collapse = ", "), + " containing time points ", paste(epochs$values, collapse = ", "), " and the following high/low cutoffs: ", paste(hi_lo_cut, collapse = " & ")), format = 'pipe', row.names = F), sep = "\n") diff --git a/R/fitModel.R b/R/fitModel.R index 42bf23cc..309280ed 100644 --- a/R/fitModel.R +++ b/R/fitModel.R @@ -1,8 +1,5 @@ #' Fit weighted model #' This code fits a weighted marginal structural model to examine the effects of different exposure histories on outcome -#' @param msm_object msm object that contains all relevant user inputs -#' @param data_for_model_with_weights_cutoff dataset with truncated weights see truncateWeights -#' @param balance_stats_final final bal stats conducted w/ full forms #' @param model user-specified model #' @return fits #' @export @@ -11,27 +8,18 @@ #' @importFrom dplyr mutate filter select #' @return fits #' @export -#' @seealso [truncateWeights()], [asesssBalance()] -#' @examples fitModel(object, data_for_model_with_weights_cutoff, balance_stats_final, model="m3") - fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outcome, tv_confounders, model, family = gaussian, link = "identity", int_order = NA, covariates = NULL, epochs = NULL, user.o = TRUE) { - # Error checking if (!(model %in% c("m0", "m1", "m2", "m3"))) { stop('Please provide a valid model "m" from 0-3 (e.g., "m1")') } - - # exposure_time_pts <- as.numeric(sapply(strsplit(tv_confounders[grepl(exposure, tv_confounders)] , "\\."), "[",2)) - - #error checking if (!dir.exists(home_dir)) { stop("Please provide a valid home directory path.") } if (!class(data) %in% c("mids", "data.frame", "character")) { stop("Please provide either a 'mids' object, a data frame, or a directory with imputed csv files in the 'data' field.") } - if (!(model %in% c("m0", "m1", "m2", "m3"))) { stop('Please provide a valid model "m" from 0-3 (e.g., "m1")') } @@ -45,7 +33,6 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco warning("Please only include covariates that are time invariant or measured at the first exposure time point.") } - weights_method <- weights[[1]]$method #create models directory @@ -74,10 +61,6 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco } - - - - # Lists out exposure-epoch combos if( is.null(epochs)){ #making epochs time pts if not specified by user epochs <- data.frame(epochs = as.character(time_pts), @@ -86,78 +69,36 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco exp_epochs <- apply(expand.grid(exposure, as.character(epochs[, 1])), 1, paste, sep = "", collapse = "_") + #getting null comparison + if (model == "m0") {n <- "int"} + if (model == "m1") {n <- "covs"} + if (model == "m2") {n <- "int"} + if (model == "m3") {n <- "covs"} + # if (family == "gaussian"){ + l <- link + family <- family(link = l) + assign("fam", family, envir = .GlobalEnv) + # family <- family(link = l) + # fam <- family(link = l) + # } - getModel <- function(d, outcome, epochs, exp_epochs, int_order, model, fam, link, covariates, interactions){ - if (sum(duplicated(d$"ID")) > 0){ - stop("Please provide wide data with a single row per ID.") - } - - # exposure epochs - if( is.null(epochs)){ #making epochs time pts if not specified by user - epochs <- data.frame(epochs = as.character(exposure_time_pts), - values = exposure_time_pts) - } else { #add epochs by averaging exposure time points - #adds exposure epochs - - #calculates the mean value for each exposure for each exposure epoch - for (e in 1:nrow(epochs)){ - epoch <- epochs[e,1] - temp <- data.frame(row.names = 1:nrow(d)) - new_var <- paste0(exposure, "_", epoch) - if (! new_var %in% colnames(d)){ - #finds data from each time point in each epoch, horizontally aligns all exposure values within the epoch for averaging - for (l in 1:length(as.numeric(unlist(epochs[e,2])))){ - level <- as.numeric(unlist(epochs[e,2]))[l] - z <- d[,which(grepl(paste0(exposure, ".", level), names(d)))] - temp <- cbind(temp, z) - } - #adds a new variable of the exposure averaged within epoch - d <- d %>% dplyr::mutate(!!new_var := rowMeans(temp, na.rm=T)) - d[,new_var] <- as.numeric(d[,new_var]) - # d <- cbind(d, new) - } - } - - } - - # Covariate models checking - if (model == "m1" | model == "m3") { - if (sum(covariates %in% colnames(d)) < length(covariates)){ - stop("Please only include covariates that correspond to variables in the wide dataset.") - } - - covariate_list <- paste(as.character(covariates), sep = "", collapse = " + ") - } else{ - covariate_list <- NULL - - covariate_list <- paste(as.character(covariates), sep = "", collapse = " + ") - } + if (class(data) == "mids"){ #imputed dataset + fits <- lapply(1:data$m, function(y) { + d <- complete(data, y) + d$weights <- NULL + d$weights <- weights[[y]]$weights + getModel(d, outcome, epochs, exp_epochs, int_order, model, family, covariates, interactions, user.o) + }) - # interaction model checking - if (model == "m2" | model == "m3"){ - if (int_order > nrow(epochs)){ - stop("Please provide an interaction order equal to or less than the total number of epochs/time points.") - } - interactions <- paste( - lapply(2:int_order, function(z) { - paste(apply(combn(exp_epochs, z), 2, paste, sep = "", collapse = ":"), sep = "", collapse = " + ") - }), - collapse = " + " - ) - #create interactions in data - for (x in 1:length(unlist(strsplit(interactions, "\\+")))) { - name <- gsub(" ", "", unlist(strsplit(interactions, "\\+"))[x]) - if (! name %in% colnames(d)){ - temp <- d[, c(gsub(" ", "", as.character(unlist(strsplit(unlist(strsplit(interactions, "\\+"))[x], "\\:"))))) ] - d <- d %>% dplyr::mutate(!! name := matrixStats::rowProds(as.matrix(temp), na.rm=T)) - } - } - } else { - interactions <- NULL - } + fits.null <- lapply(1:data$m, function(y) { + d <- complete(data, y) + d$weights <- NULL + d$weights <- weights[[y]]$weights + getModel(d, outcome, epochs, exp_epochs, int_order, model = n, family, covariates, interactions, user.o) + }) if (user.o == TRUE){ cat("Please insepct the following likelihood ratio test to determine if the exposures collective predict significant variation in the outcome compared to a model without exposure terms.", "\n") @@ -166,143 +107,104 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco cat("\n") } - s <- survey::svydesign( - id = ~1, # - data = d, # Adds list of imputation data? - weights = ~ weights - ) - - # Fitting baseline model w/ main effects only (m0) for all models - f0 <- paste(outcome, "~", paste0(exp_epochs, sep = "", collapse = " + ")) - m0 <- survey::svyglm(as.formula(f0), family = fam(link = link), design = s) # List of model fitted to all imputed datasets - - if (model == "m0") { - - null <- survey::svyglm(as.formula(paste(outcome, "~", 1)), family = fam(link = link), design = s) - print(anova(null, m0)) - - return(m0) # Save model - - } else { - - # Baseline + sig covar model OR baseline + sig covar + int model - if (model == "m1" | model == "m3") { - f1 <- paste(f0, "+", covariate_list) # Baseline + covariate model - m1 <- survey::svyglm(as.formula(f1), family = fam(link = link), design = s) - - # Baseline + imbalanced covars - if (model == "m1") { - - null <- survey::svyglm(as.formula(paste(outcome, "~ 1 + ", covariate_list)), family = fam(link = link), design = s) - print(anova(null, m1)) + print(mitml::testModels(fits, fits.null)) + cat("\n") - return(m1) - } - } - - # Baseline + interaction OR baseline + covars + interactions - if (model == "m2" | model == "m3") { - f2 <- paste(f0, "+", paste(interactions, sep = "", collapse = " + ")) - - # Baseline + interactions - if (model == "m2") { - # Fitting m2 - m2 <- survey::svyglm(as.formula(f2), family = fam(link = link), design = s) - # Baseline + interactions - - null <- survey::svyglm(as.formula(paste(outcome, "~ 1")), family = fam(link = link), design = s) - print(anova(null, m2)) - - return(m2) - } - - # Baseline + covars + interactions - if (model == "m3") { - # Fitting m3 - f3 <- paste0(f1, "+", paste(interactions, sep = "", collapse = " + ")) - m3 <- survey::svyglm(as.formula(f3), family = fam(link = link), design = s) - # Baseline + covars+ interactions - - null <- survey::svyglm(as.formula(paste(outcome, "~ 1 + ", covariate_list)), family = fam(link = link), design = s) - print(anova(null, m3)) - - # stack= complete(data, "long") - # # colnames(stack)[colnames(stack) == ".imp"] <- "impvar" - # psfmi::pool_D4(data = stack, fm1 = f3, nimp = 5, fm0 = as.formula(paste(outcome, "~ 1 + ", covariate_list)), impvar = ".imp") - - - return(m3) - } - } - } } - if (class(data) == "mids"){ #imputed dataset - fits <- lapply(1:data$m, function(y) { - d <- complete(data, y) + if (class(data) == "list"){ #imputed dataset + fits <- lapply(1:length(data), function(y) { + d <- data[[y]] d$weights <- NULL d$weights <- weights[[y]]$weights - - getModel(d, outcome, epochs, exp_epochs, int_order, model, fam, link, covariates, interactions) - - - + getModel(d, outcome, epochs, exp_epochs, int_order, model, family, covariates, interactions, user.o) }) - } - - if (class(data) == "list"){ #imputed dataset - fits <- lapply(1:length(data), function(y) { + fits.null <- lapply(1:length(data), function(y) { d <- data[[y]] d$weights <- NULL d$weights <- weights[[y]]$weights + getModel(d, outcome, epochs, exp_epochs, int_order, model = n, family, covariates, interactions, user.o) + }) + + if (user.o == TRUE){ + cat("Please insepct the following likelihood ratio test to determine if the exposures collective predict significant variation in the outcome compared to a model without exposure terms.", "\n") + cat("\n") + cat("We strongly suggest not conducting history comparisons if the likelihood ratio test is non-significant.", "\n") + cat("\n") + } - getModel(d, outcome, epochs, exp_epochs, int_order, model, fam, link, covariates, interactions) + print(mitml::testModels(fits, fits.null)) + cat("\n") - }) } - - if (class(data) == "data.frame"){ #imputed dataset +# fam = family + if (class(data) == "data.frame"){ #df fits <- lapply(1, function(y) { d <- data d$weights <- NULL d$weights <- weights[["0"]]$weights + getModel(d, outcome, epochs, exp_epochs, int_order, model, family, covariates, interactions, user.o) + }) - getModel(d, outcome, epochs, exp_epochs, int_order, model, fam, link, covariates, interactions) + fits.null <- lapply(1, function(y) { + d <- data + d$weights <- NULL + d$weights <- weights[["0"]]$weights + getModel(d, outcome, epochs, exp_epochs, int_order, model = n, family, covariates, interactions, user.o) }) + + if (user.o == TRUE){ + cat("Please insepct the following likelihood ratio test to determine if the exposures collective predict significant variation in the outcome compared to a model without exposure terms.", "\n") + cat("\n") + cat("We strongly suggest not conducting history comparisons if the likelihood ratio test is non-significant.", "\n") + cat("\n") + } + + print(anova(fits[[1]], fits.null[[1]])) + names(fits) = "0" + cat("\n") + } - names(fits) = "0" - if (user.o == TRUE){ - if (class(data) == "mids" | class(data) == "list"){ + + if (class(data) == "mids" | class(data) == "list"){ + if (user.o == TRUE){ cat(paste0("USER ALERT: the marginal model, ", model, ", run for each imputed dataset is summarized below:"), "\n") + } - suppressWarnings(jtools::export_summs( - fits, to.file = "docx", statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), - model.names = c(paste0("Imp.", 1:length(fits))), - file.name = file.path(home_dir, "/models/", paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx")) - )) + suppressWarnings(jtools::export_summs( + fits, to.file = "docx", statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), + model.names = c(paste0("Imp.", 1:length(fits))), + file.name = file.path(home_dir, "models", paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx")) + )) - } else{ + } else{ + if (user.o == TRUE){ cat(paste0("USER ALERT: the marginal model, ", model, ", is summarized below:"), "\n") + } - suppressWarnings(jtools::export_summs( - fits, to.file = "docx", statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), - file.name = file.path(home_dir, "/models/", paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx")) - )) + suppressWarnings(jtools::export_summs(fits, to.file = "docx", statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), + file.name = file.path(home_dir, "models", paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx")) + )) - } - names(fits) <- 1:length(fits) - print(lapply(fits, function(x) {summary(x)})) - names(fits) <- NULL + } + + names(fits) <- 1:length(fits) + if (user.o == TRUE){ + cat("\n") + print(lapply(fits, function(x) {summary(x)})) + cat("\n") cat("Tables of model evidence have now been saved in the 'models/' folder.\n") } + names(fits) <- NULL + saveRDS(fits, file = file.path(home_dir, "/models/", paste0(exposure, "-", outcome, "_", model, "_model.rds"))) cat("\n") diff --git a/R/formatLongData.R b/R/formatLongData.R index 06abb93f..0fc88aa4 100644 --- a/R/formatLongData.R +++ b/R/formatLongData.R @@ -2,11 +2,8 @@ formatLongData <- function(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, time_var = NA, id_var = NA, missing = NA, factor_confounders = NULL){ time_varying_covariates <- tv_confounders - # exposure_time_pts <- as.numeric(sapply(strsplit(tv_confounders[grepl(exposure, tv_confounders)] , "\\."), "[",2)) - options(readr.num_columns = 0) - # Error checking} if (!dir.exists(home_dir)) { stop('Please provide a valid home directory.') } @@ -33,9 +30,11 @@ formatLongData <- function(home_dir, data, exposure, exposure_time_pts, outcome, exposure_summary <- data %>% dplyr::filter(WAVE %in% exposure_time_pts) %>% dplyr::group_by(WAVE) %>% - dplyr::summarize_at(dplyr::vars(all_of(exposure)), list(mean = mean, sd = sd, min = min, max = max), na.rm = TRUE) + dplyr::summarize_at(dplyr::vars(all_of(exposure)), + list(mean = mean, sd = sd, min = min, max = max), na.rm = TRUE) - cat(knitr::kable(exposure_summary, caption = paste0("Summary of ", exposure, " Exposure Information"), format = 'pipe'), sep = "\n") + cat(knitr::kable(exposure_summary, caption = paste0("Summary of ", exposure, + " Exposure Information"), format = 'pipe'), sep = "\n") knitr::kable(exposure_summary, caption = paste0("Summary of ", exposure, " Exposure Information"), format = 'html') %>% kableExtra::kable_styling() %>% kableExtra::save_kable(file = file.path(home_dir, paste0(exposure, "_exposure_info.html"))) @@ -48,9 +47,11 @@ formatLongData <- function(home_dir, data, exposure, exposure_time_pts, outcome, outcome <- sapply(strsplit(outcome, "\\."), "[",1) outcome_summary <- data %>% dplyr::group_by(WAVE) %>% - dplyr::summarize_at(dplyr::vars(all_of(outcome)), list(mean = mean, sd = sd, min = min, max = max), na.rm = TRUE) + dplyr::summarize_at(dplyr::vars(all_of(outcome)), + list(mean = mean, sd = sd, min = min, max = max), na.rm = TRUE) cat(knitr::kable(outcome_summary, caption = paste0("Summary of Outcome ", outcome, " Information"), format = 'pipe'), sep = "\n") + knitr::kable(outcome_summary, caption = paste0("Summary of Outcome ", outcome, " Information"), format = 'html') %>% kableExtra::kable_styling() %>% kableExtra::save_kable(file = file.path(home_dir, paste0(outcome, "_outcome_info.html"))) @@ -69,9 +70,7 @@ formatLongData <- function(home_dir, data, exposure, exposure_time_pts, outcome, # Formatting numeric covariates numeric_vars <- colnames(data)[!colnames(data) %in% c(factor_covariates, id)] data[, numeric_vars] <- lapply(data[, numeric_vars], as.numeric) - } as.data.frame(data) - } diff --git a/R/getModel.R b/R/getModel.R new file mode 100644 index 00000000..8190fa7d --- /dev/null +++ b/R/getModel.R @@ -0,0 +1,140 @@ + +getModel <- function(d, outcome, epochs, exp_epochs, int_order, model, fam, covariates, interactions, user.o){ + + if (sum(duplicated(d$"ID")) > 0){ + stop("Please provide wide data with a single row per ID.") + } + + + # exposure epochs + if( is.null(epochs)){ #making epochs time pts if not specified by user + epochs <- data.frame(epochs = as.character(exposure_time_pts), + values = exposure_time_pts) + } else { #add epochs by averaging exposure time points + #adds exposure epochs + #calculates the mean value for each exposure for each exposure epoch + for (e in 1:nrow(epochs)){ + epoch <- epochs[e,1] + temp <- data.frame(row.names = 1:nrow(d)) + new_var <- paste0(exposure, "_", epoch) + if (! new_var %in% colnames(d)){ + #finds data from each time point in each epoch, horizontally aligns all exposure values within the epoch for averaging + for (l in 1:length(as.numeric(unlist(epochs[e,2])))){ + level <- as.numeric(unlist(epochs[e,2]))[l] + z <- d[,which(grepl(paste0(exposure, ".", level), names(d)))] + temp <- cbind(temp, z) + } + #adds a new variable of the exposure averaged within epoch + d <- d %>% dplyr::mutate(!!new_var := rowMeans(temp, na.rm=T)) + d[,new_var] <- as.numeric(d[,new_var]) + } + } + } + + + # Covariate models checking + if (model == "m1" | model == "m3" | model == "covs") { + if (sum(covariates %in% colnames(d)) < length(covariates)){ + stop("Please only include covariates that correspond to variables in the wide dataset.") + } + covariate_list <- paste(as.character(covariates), sep = "", collapse = " + ") + } else{ + covariate_list <- NULL + } + + + # interaction model checking + if (model == "m2" | model == "m3"){ + if (int_order > nrow(epochs)){ + stop("Please provide an interaction order equal to or less than the total number of epochs/time points.") + } + interactions <- paste( + lapply(2:int_order, function(z) { + paste(apply(combn(exp_epochs, z), 2, paste, sep = "", collapse = ":"), sep = "", collapse = " + ") + }), + collapse = " + " + ) + #create interactions in data + for (x in 1:length(unlist(strsplit(interactions, "\\+")))) { + name <- gsub(" ", "", unlist(strsplit(interactions, "\\+"))[x]) + if (! name %in% colnames(d)){ + temp <- d[, c(gsub(" ", "", as.character(unlist(strsplit(unlist(strsplit(interactions, "\\+"))[x], "\\:"))))) ] + d <- d %>% dplyr::mutate(!! name := matrixStats::rowProds(as.matrix(temp), na.rm=T)) + } + } + } else { + interactions <- NULL + } + + + s <- survey::svydesign( + id = ~1, + data = d, + weights = ~ weights + ) + + + #Null models + # Fitting intercept-only model + if (model == "int"){ + fi <- paste(outcome, "~ 1") + mi <- survey::svyglm(as.formula(fi), family = fam, design = s) # List of model fitted to all imputed datasets + return(mi) + } + + if (model == "covs"){ + fc <- paste(outcome, "~", covariate_list) + mc <- survey::svyglm(as.formula(fc), family = fam, design = s) # List of model fitted to all imputed datasets + return(mc) + } + + + + # Fitting baseline model w/ main effects only (m0) for all models + f0 <- paste(outcome, "~", paste0(exp_epochs, sep = "", collapse = " + ")) + m0 <- survey::svyglm(as.formula(f0), family = fam, design = s) # List of model fitted to all imputed datasets + + if (model == "m0") { + + return(m0) # Save model + + } else { + + # Baseline + sig covar model OR baseline + sig covar + int model + if (model == "m1" | model == "m3") { + f1 <- paste(f0, "+", covariate_list) # Baseline + covariate model + m1 <- survey::svyglm(as.formula(f1), family = fam, design = s) + + # Baseline + imbalanced covars + if (model == "m1") { + + return(m1) + } + } + + # Baseline + interaction OR baseline + covars + interactions + if (model == "m2" | model == "m3") { + f2 <- paste(f0, "+", paste(interactions, sep = "", collapse = " + ")) + + # Baseline + interactions + if (model == "m2") { + # Fitting m2 + m2 <- survey::svyglm(as.formula(f2), family = fam, design = s) + # Baseline + interactions + + return(m2) + } + + # Baseline + covars + interactions + if (model == "m3") { + # Fitting m3 + f3 <- paste0(f1, "+", paste(interactions, sep = "", collapse = " + ")) + f3 <- as.formula(f3) + m3 <- survey::svyglm(f3, family = fam, design = s) + # Baseline + covars+ interactions + + return(m3) + } + } + } +} diff --git a/R/imputeData.R b/R/imputeData.R index 81bc3b9e..646ca855 100644 --- a/R/imputeData.R +++ b/R/imputeData.R @@ -1,7 +1,6 @@ #' Imputes dataset so there is no missing at each time point using parallel processing to speed up #' #' Creates m imputed datasets from original datasets using mice::mice -#' @param msm_object msm object that contains all relevant user inputs #' @param data_to_impute output from dataToImpute #' @return imputed_datasets imputation results #' @export @@ -19,11 +18,8 @@ #' @importFrom purrr map_dfr #' @importFrom foreach getDoParWorkers #' @importFrom foreach getDoParName -#' @examples imputeData(msm_object, data_to_impute, read_imps_from_file = "no") #' imputeData <- function(data, m, method, home_dir, exposure, outcome, tv_confounders, ti_confounders, read_imps_from_file="no") { - - # Error checking} if (!dir.exists(home_dir)) { stop('Please provide a valid home directory.') } @@ -32,7 +28,6 @@ imputeData <- function(data, m, method, home_dir, exposure, outcome, tv_confound if (read_imps_from_file == "yes") { imputed_datasets <- list() - # Error checking if (!file.exists(glue::glue("{home_dir}/imputations/{exposure}-{outcome}_all_imp.rds"))) { stop("Imputations have not been created and saved locally. Please set 'read_imps_from_file' == 'no' and re-run.") } @@ -47,16 +42,15 @@ imputeData <- function(data, m, method, home_dir, exposure, outcome, tv_confound } else { - #error checking if (sum(duplicated(data$"ID")) > 0){ stop("Please provide a wide dataset with a single row per ID.") } - library(mice) - library(doParallel) - library(doRNG) - library(purrr) - library(tibble) + # library(mice) + # library(doParallel) + # library(doRNG) + # library(purrr) + # library(tibble) imp_method <- method time_varying_covariates <- tv_confounders @@ -81,7 +75,7 @@ imputeData <- function(data, m, method, home_dir, exposure, outcome, tv_confound # Conducts imputations using parallelized execution cycling through m imputed_datasets <- foreach(i = seq_len(m), .combine = mice::ibind) %dorng% { cat("### Started iteration", i, "\n") - miceout <- mice::mice(data_to_impute_full, m=1, method=imp_method, maxit = 0, + miceout <- mice::mice(data_to_impute_full, m = 1, method = imp_method, maxit = 0, print = F) cat("### Completed iteration", i, "\n") miceout diff --git a/R/inspectData.R b/R/inspectData.R index 3c59ffc7..d207d0b8 100644 --- a/R/inspectData.R +++ b/R/inspectData.R @@ -1,5 +1,5 @@ -inspectData <-function(data, home_dir, exposure, outcome, tv_confounders, ti_confounders, epochs = NULL, hi_lo_cut = NULL){ +inspectData <-function(data, home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, epochs = NULL, hi_lo_cut = NULL, reference = NA, comparison = NULL){ # Error checking if (!class(data) %in% c("mids", "data.frame", "character")) { @@ -11,23 +11,10 @@ inspectData <-function(data, home_dir, exposure, outcome, tv_confounders, ti_con time_var_covars <- tv_confounders time_pts <- as.numeric(sapply(strsplit(tv_confounders[grepl(exposure, tv_confounders)] , "\\."), "[",2)) - if (exposure_type == "continuous"){ - if (is.null(hi_lo_cut)){ - hi_lo_cut <- c(0.75, 0.25) - } - } - - # - # Load necessary packages - library(readr) - library(dplyr) - library(tidyr) - if (class(data) == "mids"){ data <-as.data.frame(mice::complete(data,1)) } - if (class(data) == "character") { if (!dir.exists(data)) { stop("Please provide a valid directory path with imputed datasets, a data frame, or a 'mids' object for the 'data' field.") @@ -55,14 +42,19 @@ inspectData <-function(data, home_dir, exposure, outcome, tv_confounders, ti_con exposure_type <- ifelse(class(data[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary") + if (exposure_type == "continuous"){ + if (is.null(hi_lo_cut)){ + hi_lo_cut <- c(0.75, 0.25) + } + } + # Exposure summary exposure_summary <- data %>% dplyr:: select(colnames(data)[grepl(exposure, colnames(data))]) exposure_summary <- psych::describe(exposure_summary, fast = TRUE) - - cat(knitr::kable(exposure_summary, caption = paste0("Summary of ", exposure, " Exposure Information"), format = 'pipe'), sep = "\n") + knitr::kable(exposure_summary, caption = paste0("Summary of ", exposure, " Exposure Information"), format = 'html') %>% kableExtra::kable_styling() %>% kableExtra::save_kable(file = file.path(home_dir, paste0("/", exposure, "_exposure_info.html"))) @@ -78,7 +70,9 @@ inspectData <-function(data, home_dir, exposure, outcome, tv_confounders, ti_con cat(knitr::kable(outcome_summary, caption = paste0("Summary of Outcome ", - sapply(strsplit(outcome, "\\."), "[",1), " Information"), format = 'pipe'), sep = "\n") + sapply(strsplit(outcome, "\\."), "[",1), " Information"), + format = 'pipe'), sep = "\n") + knitr::kable(outcome_summary, caption = paste0("Summary of Outcome ", sapply(strsplit(outcome, "\\."), "[",1), " Information"), format = 'html') %>% kableExtra::kable_styling() %>% @@ -92,15 +86,16 @@ inspectData <-function(data, home_dir, exposure, outcome, tv_confounders, ti_con potential_covariates <- colnames(data)[!(colnames(data) %in% c(ID))] if (sum(tv_confounders %in% potential_covariates) != length(tv_confounders)){ - stop(paste(tv_confounders[!tv_confounders %in% potential_covariates]), " time-varying confounders are not present in the dataset.") + stop(paste(tv_confounders[!tv_confounders %in% potential_covariates]), + " time-varying confounders are not present in the dataset.") } if (sum(ti_confounders %in% potential_covariates) != length(ti_confounders)){ - stop(paste(ti_confounders[!ti_confounders %in% potential_covariates]), " time invariant confounders are not present in the dataset.") + stop(paste(ti_confounders[!ti_confounders %in% potential_covariates]), + " time invariant confounders are not present in the dataset.") } all_potential_covariates <- c(time_invar_covars, time_var_covars) - # all_potential_covariates <- all_potential_covariates[!(all_potential_covariates %in% c(paste(outcome, outcome_time_pt, sep = "."), time_var_exclude))] all_potential_covariates <- all_potential_covariates[order(all_potential_covariates)] # Format for table output to visualize available covariates by time point @@ -110,18 +105,21 @@ inspectData <-function(data, home_dir, exposure, outcome, tv_confounders, ti_con dplyr::group_by(time_pt) %>% dplyr::summarize(variable = toString(variable)) - write.csv(covar_table, glue::glue("{home_dir}/balance/{exposure}-{outcome}_covariates_considered_by_time_pt.csv"), row.names = FALSE) + write.csv(covar_table, glue::glue("{home_dir}/balance/{exposure}-{outcome}_covariates_considered_by_time_pt.csv"), + row.names = FALSE) unique_vars <- length(unique(c(time_invar_covars, sapply(strsplit(all_potential_covariates, "\\."), "[", 1)))) test <- data.frame(matrix(nrow = length(time_pts), ncol = unique_vars)) colnames(test) <- unique(c(time_invar_covars, sapply(strsplit(all_potential_covariates, "\\."), "[", 1)))[order(unique(c(time_invar_covars, - sapply(strsplit(all_potential_covariates, "\\."), "[", 1))))] + sapply(strsplit(all_potential_covariates, + "\\."), "[", 1))))] rownames(test) <- time_pts for (l in 1:nrow(test)) { - test[l, c(sapply(strsplit(all_potential_covariates[grepl(paste0(".", rownames(test)[l]), all_potential_covariates)], "\\."), "[", 1), time_invar_covars)] <- 1 + test[l, c(sapply(strsplit(all_potential_covariates[grepl(paste0(".", rownames(test)[l]), + all_potential_covariates)], "\\."), "[", 1), time_invar_covars)] <- 1 } test <- test[, colnames(test)[!(colnames(test) %in% c(ID))]] @@ -129,7 +127,9 @@ inspectData <-function(data, home_dir, exposure, outcome, tv_confounders, ti_con test <- rbind(test, t(NumTimePts)) NumVars <- data.frame(NumVars = rowSums(test, na.rm = TRUE)) test[1:nrow(test), ncol(test) + 1] <- NumVars - write.csv(test, glue::glue("{home_dir}/balance/{exposure}-{outcome}_matrix_of_covariates_considered_by_time_pt.csv"), row.names = TRUE) + + write.csv(test, glue::glue("{home_dir}/balance/{exposure}-{outcome}_matrix_of_covariates_considered_by_time_pt.csv"), + row.names = TRUE) message(glue::glue("See the 'balance/' folder for a table and matrix displaying all covariates confounders considered at each exposure time point for {exposure} and {outcome}."), "\n") cat("\n") @@ -142,25 +142,17 @@ inspectData <-function(data, home_dir, exposure, outcome, tv_confounders, ti_con #covariate correlations covariates_to_include <- all_potential_covariates - flattenCorrMatrix <- function(cormat, pmat) { - ut <- upper.tri(cormat) - tibble( - row = rownames(cormat)[row(cormat)[ut]], - column = rownames(cormat)[col(cormat)[ut]], - cor = cormat[ut], - p = pmat[ut] - ) - } # Creates final dataset with only relevant variables covariates_to_include <- covariates_to_include[order(covariates_to_include)] variables_to_include <- unique(c(ID, outcome, covariates_to_include, time_var_covars)) data2 <- data %>% select(all_of(variables_to_include)) - data_to_impute <- data2 # Makes correlation table - corr_matrix <- cor(as.data.frame(lapply(data_to_impute[, colnames(data_to_impute) != ID], as.numeric)), use = "pairwise.complete.obs") + corr_matrix <- cor(as.data.frame(lapply(data2[, colnames(data2) != ID], + as.numeric)), use = "pairwise.complete.obs") + ggcorrplot::ggcorrplot(corr_matrix, type = "lower")+ ggplot2::theme(axis.text.x = element_text(size = 5, margin = ggplot2::margin(-2,0,0,0)), # Order: top, right, bottom, left axis.text.y = element_text(size = 5, margin = ggplot2::margin(0,-2,0,0))) + @@ -184,7 +176,6 @@ inspectData <-function(data, home_dir, exposure, outcome, tv_confounders, ti_con values = time_pts) } - - - + eval_hist(data = data2, exposure, tv_confounders, epochs, + exposure_time_pts, hi_lo_cut, ref = reference, comps = comparison) } diff --git a/R/make_love_plot.R b/R/make_love_plot.R index 34394bb7..a143224a 100644 --- a/R/make_love_plot.R +++ b/R/make_love_plot.R @@ -3,6 +3,7 @@ make_love_plot <- function(home_dir, folder, exposure, exposure_time_pt, exposur stat_var <- colnames(balance_stats)[grepl("_bal", colnames(balance_stats))] colnames(balance_stats)[colnames(balance_stats) == stat_var] <- "avg_bal" + balance_stats <- balance_stats %>% dplyr::arrange(avg_bal) x_lab <- ifelse(exposure_type == "continuous", "Correlation with Exposure", "Standardized Mean Difference Between Exposures") @@ -12,8 +13,8 @@ make_love_plot <- function(home_dir, folder, exposure, exposure_time_pt, exposur max_val <- ifelse(max(balance_stats[, "avg_bal"]) > 0, max(balance_stats[, "avg_bal"]) + 0.05, max(balance_thresh) + 0.05) # Make love plot per exposure time point - lp <- ggplot2::ggplot(aes(x = avg_bal, y = covariate), data = balance_stats) + - ggplot2::geom_point(aes(y = as.factor(covariate), x = avg_bal, fill = "white", alpha = 1)) + + lp <- ggplot2::ggplot(aes(x = avg_bal, y = reorder(as.factor(covariate), avg_bal)), data = balance_stats) + + ggplot2::geom_point(aes(y = reorder(as.factor(covariate), avg_bal), x = avg_bal, fill = "white", alpha = 1)) + ggplot2::geom_text(aes(label = labels, hjust = -0.2, vjust = 0.2), size = 1.5, color = "red") + ggplot2::xlab(x_lab) + ggplot2::ylab("Covariate") + @@ -35,7 +36,6 @@ make_love_plot <- function(home_dir, folder, exposure, exposure_time_pt, exposur } - if (!is.null(imp_conf)){ #adding threshold lines lp <- lp + ggplot2::geom_vline(xintercept = balance_thresh[1], linetype = "dashed", color = "red") lp <- lp + ggplot2::geom_vline(xintercept = -balance_thresh[1], linetype = "dashed", color = "red") @@ -50,12 +50,14 @@ make_love_plot <- function(home_dir, folder, exposure, exposure_time_pt, exposur if (data_type == "imputed"){ lp <- lp + ggplot2::ggtitle(paste0(exposure, " (t = ", exposure_time_pt, ") Balance for Imputation ", k)) - suppressMessages(ggplot2::ggsave(lp, filename = paste0(home_dir, "/balance/", folder, "plots/", form_name, "_imp_", k, "_", exposure, "_", - exposure_time_pt, "_", weights_method, "_summary_balance_plot.jpeg"), width = 6, height = 8)) + suppressMessages(ggplot2::ggsave(lp, filename = paste0(home_dir, "/balance/", folder, "plots/", + form_name, "_imp_", k, "_", exposure, "_", exposure_time_pt, "_", + weights_method, "_summary_balance_plot.jpeg"), width = 6, height = 8)) } else { lp <- lp + ggplot2::ggtitle(paste0(exposure, " (t = ", exposure_time_pt, ") Balance")) suppressMessages(ggplot2::ggsave(lp, filename = paste0(home_dir, "/balance/", folder, "plots/", form_name, "_", exposure, "_", - exposure_time_pt, "_", weights_method, "_summary_balance_plot.jpeg"), width = 6, height = 8)) + exposure_time_pt, "_", weights_method, "_summary_balance_plot.jpeg"), + width = 6, height = 8)) } } diff --git a/R/trimWeights.R b/R/trimWeights.R index bb63f3a9..ca42ac4a 100644 --- a/R/trimWeights.R +++ b/R/trimWeights.R @@ -2,11 +2,9 @@ trimWeights <- function(home_dir, weights, quantile = 0.95, user.o = TRUE){ - #error checking if (!dir.exists(home_dir)) { stop("Please provide a valid home directory path.") } - if (quantile > 1 || quantile < 0) { stop('Please select a quantile value between 0 and 1.') } @@ -31,7 +29,6 @@ trimWeights <- function(home_dir, weights, quantile = 0.95, user.o = TRUE){ trim_weights <- lapply(1:length(weights), function(x){ w <- weights[[x]] - t <- WeightIt::trim(w$weights, at = quantile) if (user.o == TRUE){ @@ -45,24 +42,22 @@ trimWeights <- function(home_dir, weights, quantile = 0.95, user.o = TRUE){ # Save histogram of new weights ggplot2::ggplot(as.data.frame(t), aes(x = t)) + ggplot2::geom_histogram(color = 'black', bins = 15) - ggplot2::ggsave(paste("Hist_", exposure, "-", outcome, "_", w$method, "_weights_trim_", quantile, "_imp_", x, ".png", sep = ""), - path = paste0(home_dir, "/weights/histograms/"), height = 8, width = 14) + ggplot2::ggsave(paste("Hist_", exposure, "-", outcome, "_", weights[[x]]$method, "_weights_trim_", + quantile, "_imp_", x, ".png", sep = ""), path = paste0(home_dir, "/weights/histograms/"), + height = 8, width = 14) w$weights <- NA w$weights <- t w }) - } # df if ( !is.null(names(weights)) ){ - trim_weights <- lapply(1, function(x){ w <- weights[[1]] - t <- WeightIt::trim(w$weights, at = quantile) if (user.o == TRUE){ @@ -76,7 +71,7 @@ trimWeights <- function(home_dir, weights, quantile = 0.95, user.o = TRUE){ # Save histogram of new weights ggplot2::ggplot(as.data.frame(t), aes(x = t)) + ggplot2::geom_histogram(color = 'black', bins = 15) - ggplot2::ggsave(paste("Hist_", exposure, "-", outcome,"_", w$method, "_weights_trim_", quantile, ".png", sep = ""), + ggplot2::ggsave(paste("Hist_", exposure, "-", outcome,"_",weights[[x]]$method, "_weights_trim_", quantile, ".png", sep = ""), path = paste0(home_dir, "/weights/histograms/"), height = 8, width = 14) w$weights <- NA @@ -87,7 +82,7 @@ trimWeights <- function(home_dir, weights, quantile = 0.95, user.o = TRUE){ } # Save truncated weight data - saveRDS(trim_weights, paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", w$method, "_weights_trim.rds")) + saveRDS(trim_weights, paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", weights[[1]]$method, "_weights_trim.rds")) trim_weights } diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd index 77989804..e19026df 100644 --- a/examplePipelineRevised.Rmd +++ b/examplePipelineRevised.Rmd @@ -8,14 +8,6 @@ output: html_document Recommended workflow for using the devMSMs package with longitudinal data. -data can be a single dataframe, mids object, or folder of imputed datasets in wide format labeled 0-k (0=or)), with the first column as an ID column for the subject identifier, missing values as NA, and continuous/factor variables appropriately classed. -All time-varying variables must be named in the format of the variable name and time point separated by a period, var.t (e.g., income.6) and should include exposure and outcome variables. -Exposure should be listed without the time point(s). Exposure time points are assumed to equal or fully encompass time time points of all other covariates. -Outcome should be listed followed by a period and the outcome time point (e.g., EF.58) -All functions will have the option to output user guidance in the console or not -Home directory will be working directory -Assumes time-varying confounders time points are equal to (or a subset of) the time points at which the exposure was measured - ### Core inputs ```{r} @@ -24,7 +16,6 @@ library(roxygen2) home_dir <- '/Users/isabella/Library/CloudStorage/Box-Box/BSL General/MSMs/testing' - exposure <- "ESETA1" exposure_time_pts <- c(6, 15, 24, 35, 58) @@ -63,15 +54,17 @@ data_orig <- data_orig %>% select(-c(contains(":"))) #example mids imputed data -# save(imputed_data, file = file.path(home_dir, "imputed_data.rds")) +# saveRDS(imputed_data, file = file.path(home_dir, "imputed_data.rds")) data_mids <- readRDS(paste0(home_dir, "/imputed_data.rds")) #example path to imputed data data_path <- "/Users/isabella/Library/CloudStorage/Box-Box/BSL General/MSMs/testing/imputations/" -data <- imputed_data[[1]] -data <- data %>% select(-c(contains(c(":", "Childhood", "Infancy", "Toddlerhood")))) +data_df <- data_mids[[1]] +data_df <- data_df %>% select(-c(contains(c(":", "Childhood", "Infancy", "Toddlerhood")))) +data <- data_df +data <- data_mids ``` @@ -86,19 +79,6 @@ data_long <- formatLongData(home_dir, data_orig, exposure, exposure_time_pts, ou factor_confounders = c("state","TcBlac2", "PmBlac2", "RHasSO")) -#optional function visualize & summarize confounders with long or wide data either as a single df or imputed (will use imp=0) and view exposure histories -# can take a single df (wide/long) and will uses first imputation from mids/imp input -epochs <- data.frame(epochs=c("Infancy", - "Toddlerhood", - "Childhood"), # optional subsetting of time points into more coarse epochs that will constitute exposure histories - values=I(list(c(6,15), c(24,35), c(58)))) - -hi_lo_cut <- c(0.6, 0.3) #optional list of quantiles specifying high and low cutoff values for continuous expsures; default are c(0.75, 0.25) - -inspectData(data_long, home_dir, exposure, outcome, tv_confounders, ti_confounders, epochs, hi_lo_cut) -# inspectConfounders(data_wide, home_dir, exposure, outcome, tv_confounders, ti_confounders, epochs, hi_lo_cut) -# inspectConfounders(data_mids, home_dir, exposure, outcome, tv_confounders, ti_confounders, epochs, hi_lo_cut) -# inspectConfounders(data_path, home_dir, exposure, outcome, tv_confounders, ti_confounders, epochs, hi_lo_cut) #format data from long to wide for imputation @@ -114,10 +94,12 @@ data_wide <- data_wide[,colSums(is.na(data_wide))1 -full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, bal_stats, concur_conf, keep_conf, custom, user.o = TRUE) +full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, bal_stats = NULL, type, + concur_conf, keep_conf, custom, user.o = TRUE) @@ -156,6 +166,7 @@ full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, ## Conduct exploratory pre-balance checking #optional balance threshold specification +rm(balance_thresh) #don't specify balance_thresh <- 0.1 balance_thresh <- c(0.05, 0.1) #optional list of balance thresholds for importnt & less important confoundres, respectively; one entry will be applied to all confounders; default is 0.1 for all @@ -165,7 +176,8 @@ imp_conf <- NULL type <- "prebalance" formulas <- full_formulas -prebalance_stats <- assessBalance(home_dir, data, exposure, outcome, tv_confounders, type, formulas, weights = NULL, balance_thresh, imp_conf, user.o = TRUE) +prebalance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights = NULL, + balance_thresh, imp_conf, user.o = TRUE) ``` @@ -185,14 +197,16 @@ keep_conf <- NULL #optional list of any time-varying confounders to always ret keep_conf <- "InRatioCor.6" #optional custom formulas -custom <- list(formulas = as.formula("ESETA1.6 ~ BioDadInHH2 + DrnkFreq + gov_asst + HomeOwnd + KFASTScr + mat_health + peri_health + PmAge2 + PmBlac2 + PmEd2 + PmMrSt2 + RHealth + RMomAgeU + - SmokTotl + state + SurpPreg + SWghtLB + TcBlac2")) +custom <- list(formulas = as.formula("ESETA1.6 ~ BioDadInHH2 + DrnkFreq + gov_asst + HomeOwnd + KFASTScr + mat_health + peri_health + PmAge2 + PmBlac2 + PmEd2 + PmMrSt2 + RHealth + RMomAgeU + SmokTotl + state + SurpPreg + SWghtLB + TcBlac2")) names(custom) <-c("short_form_ESETA1-EF_avg_perc.58-6") custom <- short_formulas -custom <- NULL +rm(custom) + +rm(bal_stats) type <- "short" # "full" =all tv covars; "short" =only t-1 tv covars; "update" = update short forms w/ imbalanced tv covars t>1 -short_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, bal_stats, concur_conf, keep_conf, custom, user.o = TRUE) +short_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, bal_stats = NULL, type, + concur_conf, keep_conf, user.o = TRUE) @@ -203,22 +217,28 @@ short_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, formulas <- short_formulas method = "ps"; # optional weighting method "glm", "gbm", "bart", "super", "cbps" (default is cbps) -weights.ps <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, method, read_in_from_file = "no", user.o = TRUE) +weights.ps <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, + method, read_in_from_file = "no", user.o = TRUE) method = "cbps"; # optional weighting method "glm", "gbm", "bart", "super", "cbps" (default is cbps) -weights.cbps <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, method, read_in_from_file = "no", user.o = TRUE) +weights.cbps <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, + method, read_in_from_file = "no", user.o = TRUE) method = "glm"; # optional weighting method "glm", "gbm", "bart", "super", "cbps" (default is cbps) -weights.glm <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, method, read_in_from_file = "no", user.o = TRUE) +weights.glm <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, + method, read_in_from_file = "no", user.o = TRUE) method = "gbm"; # optional weighting method "glm", "gbm", "bart", "super", "cbps" (default is cbps) -weights.gbm <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, method, read_in_from_file = "no", user.o = TRUE) +weights.gbm <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, + method, read_in_from_file = "no", user.o = TRUE) method = "bart"; # optional weighting method "glm", "gbm", "bart", "super", "cbps" (default is cbps) -weights.bart <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, method, read_in_from_file = "no", user.o = TRUE) +weights.bart <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, + method, read_in_from_file = "no", user.o = TRUE) method = "super"; # optional weighting method "glm", "gbm", "bart", "super", "cbps" (default is cbps) -weights.super <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, method, read_in_from_file = "no", user.o = TRUE) +weights.super <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, + method, read_in_from_file = "no", user.o = TRUE) @@ -236,29 +256,36 @@ type <- "weighted" formulas <- short_formulas weights <- weights.ps -balance_stats.ps <- assessBalance(home_dir, data, exposure, outcome, tv_confounders, type, formulas, weights, balance_thresh, imp_conf, user.o = TRUE) +balance_stats.ps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, + balance_thresh, imp_conf, user.o = TRUE) weights <- weights.cbps -balance_stats.cbps <- assessBalance(home_dir, data, exposure, outcome, tv_confounders, type, formulas, weights, balance_thresh, imp_conf, user.o = TRUE) +balance_stats.cbps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, + balance_thresh, imp_conf, user.o = TRUE) weights <- weights.glm -balance_stats.glm <- assessBalance(home_dir, data, exposure, outcome, tv_confounders, type, formulas, weights, balance_thresh, imp_conf, user.o = TRUE) +balance_stats.glm <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, + balance_thresh, imp_conf, user.o = TRUE) weights <- weights.gbm -balance_stats.gbm <- assessBalance(home_dir, data, exposure, outcome, tv_confounders, type, formulas, weights, balance_thresh, imp_conf, user.o = TRUE) +balance_stats.gbm <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, + balance_thresh, imp_conf, user.o = TRUE) weights <- weights.bart -balance_stats.bart <- assessBalance(home_dir, data, exposure, outcome, tv_confounders, type, formulas, weights, balance_thresh, imp_conf, user.o = TRUE) +balance_stats.bart <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, + balance_thresh, imp_conf, user.o = TRUE) weights <- weights.super -balance_stats.super <- assessBalance(home_dir, data, exposure, outcome, tv_confounders, type, formulas, weights, balance_thresh, imp_conf, user.o = TRUE) +balance_stats.super <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, + balance_thresh, imp_conf, user.o = TRUE) # identify best weights formulas <- short_formulas method <- "ps"; # add best method here -best_method_weights <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, method, read_in_from_file = "yes", user.o = TRUE) +best_method_weights <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, + method, read_in_from_file = "yes", user.o = TRUE) ``` @@ -281,14 +308,14 @@ type <- "weighted" formulas <- full_formulas weights <- best_method_weights -balance_stats <- assessBalance(home_dir, data, exposure, outcome, tv_confounders, type, formulas, weights, balance_thresh, imp_conf, user.o = TRUE) +balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, + balance_thresh, imp_conf, user.o = TRUE) ## Update short formulas with any t->1 tv confounders that were not balanced #optional custom formulas -custom <- list(formulas = as.formula("ESETA1.6 ~ BioDadInHH2 + DrnkFreq + gov_asst + HomeOwnd + KFASTScr + mat_health + peri_health + PmAge2 + PmBlac2 + PmEd2 + PmMrSt2 + RHealth + RMomAgeU + - SmokTotl + state + SurpPreg + SWghtLB + TcBlac2")) +custom <- list(formulas = as.formula("ESETA1.6 ~ BioDadInHH2 + DrnkFreq + gov_asst + HomeOwnd + KFASTScr + mat_health + peri_health + PmAge2 + PmBlac2 + PmEd2 + PmMrSt2 + RHealth + RMomAgeU + SmokTotl + state + SurpPreg + SWghtLB + TcBlac2")) names(custom) <-c("update_form_ESETA1-EF_avg_perc.58-6") custom <- updated_formulas custom <- NULL @@ -303,14 +330,16 @@ keep_conf <- "InRatioCor.6" type <- "update" # "full" =all tv covars; "short" =only t-1 tv covars; "update" = update short forms w/ imbalanced tv covars t>1 bal_stats <- balance_stats -updated_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, bal_stats, concur_conf, keep_conf, custom, user.o = TRUE) +updated_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, bal_stats, + concur_conf, keep_conf, user.o = TRUE) # Re-specify weights with optimal method and updated formulas formulas <- updated_formulas method <- "ps"; # optional weighting method "glm", "gbm", "bart", "super", "cbp (default is cbps) -final_weights <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, method, read_in_from_file = "no", user.o = TRUE) +final_weights <- createWeights(home_dir, data, exposure, outcome, tv_confounders, formulas, + method, read_in_from_file = "no", user.o = TRUE) @@ -318,14 +347,17 @@ final_weights <- createWeights(home_dir, data, exposure, outcome, tv_confounders weights <- final_weights quantile <- 0.95 # optional quantile of weights above which weights are trimmed (default is 0.95) -trim_weights <- trimWeights(home_dir, weights, quantile, user.o = TRUE) #this is just a wrapper function for the trim function to allow for summaries and plotting +trim_weights <- trimWeights(home_dir, weights, quantile, + user.o = TRUE) #this is just a wrapper function for the trim function to allow for summaries and plotting #sensitivity tests quantile <- 0.92 #optional quantile of weights above which weights are trimmed (default is 0.95) -trim_weights.s1 <- trimWeights(home_dir, weights, quantile, user.o = TRUE) #this is just a wrapper function for the trim function to allow for summaries and plotting +trim_weights.s1 <- trimWeights(home_dir, weights, quantile, + user.o = TRUE) #this is just a wrapper function for the trim function to allow for summaries and plotting quantile <- 0.98 #optional quantile of weights above which weights are trimmed (default is 0.95) -trim_weights.s2 <- trimWeights(home_dir, weights, quantile, user.o = TRUE) #this is just a wrapper function for the trim function to allow for summaries and plotting +trim_weights.s2 <- trimWeights(home_dir, weights, quantile, + user.o = TRUE) #this is just a wrapper function for the trim function to allow for summaries and plotting ``` @@ -347,34 +379,35 @@ imp_conf <- NULL type <- "weighted" formulas <- full_formulas weights <- trim_weights -final_balance_stats <- assessBalance(home_dir, data, exposure, outcome, tv_confounders, type, formulas, weights, balance_thresh, imp_conf, user.o = TRUE) +final_balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, + balance_thresh, imp_conf, user.o = TRUE) + #sensitivity tests weights <- trim_weights.s1 -final_balance_stats.s1 <- assessBalance(home_dir, data, exposure, outcome, tv_confounders, type, formulas, weights, balance_thresh, imp_conf, user.o = TRUE) +final_balance_stats.s1 <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, + balance_thresh, imp_conf, user.o = TRUE) weights <- trim_weights.s2 -final_balance_stats.s2 <- assessBalance(home_dir, data, exposure, outcome, tv_confounders, type, formulas, weights, balance_thresh, imp_conf, user.o = TRUE) +final_balance_stats.s2 <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, + balance_thresh, imp_conf, user.o = TRUE) ``` ### Fit marginal structural model & summarize results -*m0:Baseline model regressing the outcome on the main effects of exposure at each exposure epoch (e.g., infancy, toddlerhood, childhood) -*m1: Covariate model regressing the outcome on the main effects of exposure at each exposure epoch as well as any covariate confounders measured at baseline that remained imbalanced after weighting -*m2: Interaction model regressing the outcome on the main effects of exposure at each exposure epoch as well as all interactions between exposure epochs (e.g., infancy:toddlerhood) -*m3: Full model regressing the outcome on the main effects of exposure at each exposure epoch, all baseline covariate confounders that remained imbalanced, as well as all exposure epoch interactions (default). + ```{r} #optional family/link information for glm -family <- gaussian #optional family reflective of outcome distribution +family <- gaussian #optional family reflective of outcome distribution link <- "identity" #optional link function -#interaction order (required for interaction models m2-3) +# max interaction order (required for interaction models m2-3) int_order = 3 #covariates (required for covariate models m1, m3) -covariates <- c("ESETA1.6", "gov_asst", "KFASTScr", "peri_health", "PmEd2", "RHealth") #optional list of covariates (e.g., those remaining imbalanced) for covariate models (m1, m3) +covariates <- c("ESETA1.6", "gov_asst") #optional list of covariates (e.g., those remaining imbalanced) for covariate models (m1, m3) #optional specification of epochs epochs <- data.frame(epochs=c("Infancy", "Toddlerhood", "Childhood"), # optional subsetting of time points into more coarse epochs @@ -383,15 +416,18 @@ epochs <- data.frame(epochs=c("Infancy", "Toddlerhood", "Childhood"), # optional model <- "m3" weights <- trim_weights -model <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, tv_confounders, model, family, link, int_order, covariates, epochs, user.o = TRUE) +models <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, tv_confounders, model, + family, link, int_order, covariates, epochs, user.o = TRUE) #sensitivity tests weights <- trim_weights.s1 -model.s1 <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, tv_confounders, model, family, link, int_order, covariates, epochs, user.o = TRUE) +models.s1 <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, tv_confounders, model, + family, link, int_order, covariates, epochs, user.o = TRUE) weights <- trim_weights.s2 -model.s2 <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, tv_confounders, model, family, link, int_order, covariates, epochs, user.o = TRUE) +models.s2 <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, tv_confounders, model, + family, link, int_order, covariates, epochs, user.o = TRUE) @@ -406,11 +442,12 @@ reference <- NA #optional reference history for custom comparisons (e.g, "l-l-l" reference <- "l-l-l" #optional comparison history/historis -comparison <- NULL #optional list of histories/history for custom comparisons (e.g., c("h-h-h", "h-h-l")) (deafult is all comparisons) -comparison <- "h-h-h" +comparison <- NULL #optional list of histories/history for custom comparisons +comparison <- "h-h-h" #single +comparison <- c("h-h-h", "h-h-l") #multiple #optional multiple comparion method -mc_method <- "BY" # default is Benjamini-Hochburg, ("holm", "hochberg","hommel", "bonferroni", "BH", "BY", "fdr", "none" (see stats::p.adjust() documentation) +mc_comp_method<- "BY" # default is Benjamini-Hochburg, ("holm", "hochberg","hommel", "bonferroni", "BH", "BY", "fdr", "none" (see stats::p.adjust() documentation) #optional specification of dose level (high or low) for dose count dose_level <- "l" #(default is "h") @@ -422,20 +459,27 @@ exp_lab <- "Economic Strain" # (default is var name) out_lab <- "Executive Functioning" #(default is var name) # optional list of colors (equal to number of epochs +1) or brewer palette for plotting - colors <- (c("Dark2")) #(see RColorBrewer::display.brewer.all() or https://r-graph-gallery.com/38-rcolorbrewers-palettes.html) for plotting default is 'Dark2'); c("blue4", "darkgreen", "darkgoldenrod", "red2") +colors <- (c("Dark2")) #(see RColorBrewer::display.brewer.all() or https://r-graph-gallery.com/38-rcolorbrewers-palettes.html) for plotting default is 'Dark2'); c("blue4", "darkgreen", "darkgoldenrod", "red2") -model <- model #output from fitModel -results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, epochs, hi_lo_cut, reference, comparison, mc_comp_method , dose_level, exp_lab, out_lab, colors) +model <- models #output from fitModel +results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, + epochs, hi_lo_cut, reference, comparison, mc_comp_method , dose_level, exp_lab, out_lab, colors, user.o = TRUE) #sensitivity tests -model <- model.s1 #output from fitModel -results.s1 <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, epochs, hi_lo_cut, reference, comparison, mc_comp_method , dose_level, exp_lab, out_lab, colors) +model <- models.s1 #output from fitModel +results.s1 <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, + epochs, hi_lo_cut, reference, comparison, mc_comp_method , dose_level, exp_lab, out_lab, colors, user.o = TRUE) -model <- model.s2 #output from fitModel -results.s2 <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, epochs, hi_lo_cut, reference, comparison, mc_comp_method , dose_level, exp_lab, out_lab, colors) +model <- models.s2 #output from fitModel +results.s2 <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, + epochs, hi_lo_cut, reference, comparison, mc_comp_method , dose_level, exp_lab, out_lab, colors, user.o = TRUE) ``` +Package citations +```{r} + +``` diff --git a/man/assessBalance.Rd b/man/assessBalance.Rd index f4eabe5a..fb76a960 100644 --- a/man/assessBalance.Rd +++ b/man/assessBalance.Rd @@ -2,14 +2,13 @@ % Please edit documentation in R/assessBalance.R \name{assessBalance} \alias{assessBalance} -\title{Assess success of covariate balancing -Assesses how well balance was achieved for each of the covariates/potential confounds in relation to each of the exposure, -and returns a list of unbalanced covariates for each exposure to add to future models} +\title{Assess success of covariate balancing} \usage{ assessBalance( home_dir, data, exposure, + exposure_time_pts, outcome, tv_confounders, type, @@ -17,31 +16,9 @@ assessBalance( weights = NULL, balance_thresh = 0.1, imp_conf = NULL, - user.o = T + user.o = TRUE ) } -\arguments{ -\item{object}{msm object that contains all relevant user inputs} - -\item{forms}{formula for assessing balance} - -\item{data_for_model_with_weights}{imputed data with weights} - -\item{histories}{optional binary indicator of whether to print histories for bal stats} -} -\value{ -list of unbalanced_covariates_for_models for each exposure -} \description{ Assess success of covariate balancing -Assesses how well balance was achieved for each of the covariates/potential confounds in relation to each of the exposure, -and returns a list of unbalanced covariates for each exposure to add to future models -} -\examples{ -assessBalance(object, forms, data_for_model_with_weights, histories=1) -} -\seealso{ -\code{\link[=msmObject]{msmObject()}} for more on the weights_models param - -\code{\link[=createWeights]{createWeights()}} for more on the weights_models param } diff --git a/man/calcBalStats.Rd b/man/calcBalStats.Rd index 5cd091b1..68e53acc 100644 --- a/man/calcBalStats.Rd +++ b/man/calcBalStats.Rd @@ -8,35 +8,15 @@ calcBalStats( data, formulas, exposure, + exposure_time_pts, outcome, - balance_thresh = 0.1, + balance_thresh, k = 0, weights = NULL, imp_conf = NULL, user.o ) } -\arguments{ -\item{exposure}{exposure} - -\item{outcome}{outcome} - -\item{object}{msm object that contains all relevant user inputs} - -\item{imp_wide_data}{wide data} - -\item{forms}{from createForms(), createShortForms, or updateForms} - -\item{f_type}{form type label} - -\item{weighted}{binary indicator of whether to calculate weighted balance stats} - -\item{histories}{binary indicator of whether to cat history sample distributions} -} \description{ Code to calculate balance stats based on Jackson paper (either weighted or unweighted) } -\examples{ -calcBalStats(object, imp_wide_data, forms, f_type, exposure, outcome, k=1, weighted=0, histories=1) - -} diff --git a/man/compareHistories.Rd b/man/compareHistories.Rd index d05e91b7..583917ac 100644 --- a/man/compareHistories.Rd +++ b/man/compareHistories.Rd @@ -20,17 +20,14 @@ compareHistories( dose_level = "h", exp_lab = NA, out_lab = NA, - colors = "Dark2" + colors = "Dark2", + user.o = TRUE ) } \arguments{ -\item{reference}{optional reference event for custom comparison} - -\item{object}{msm object that contains all relevant user inputs} - -\item{data_for_model_with_weights_cutoff}{imputed datasets with truncated weights} +\item{model}{fitted models for each imputed dataset} -\item{all_models}{fitted models for each imputed dataset} +\item{reference}{optional reference event for custom comparison} \item{compare}{optional comparison event(s) for custom comparison} } @@ -41,6 +38,3 @@ preds_pool Compare exposure histories This code uses the best-fitting model for each exposure-outcome pair to compare the effects of user-specified reference and comparison histories of exposure on outcome using linear hypothesis testing } -\examples{ -compareHistories(object, data_for_model_with_weights_cutoff, all_models, reference=NA, compare=NA) -} diff --git a/man/createWeights.Rd b/man/createWeights.Rd index 9fbcb935..cc15a216 100644 --- a/man/createWeights.Rd +++ b/man/createWeights.Rd @@ -18,12 +18,6 @@ createWeights( } \arguments{ \item{read_in_from_file}{optional binary indicator of whether weights should be read in from a local file} - -\item{object}{msm object that contains all relevant user inputs} - -\item{wide_long_datasets}{from formatForWeights} - -\item{short_forms}{from createShortForms} } \value{ weights_models @@ -31,9 +25,3 @@ weights_models \description{ Returns a list of weights_models } -\examples{ -createWeights(object, wide_long_datasets, short_forms, read_in_from_file="no") -} -\seealso{ -\code{\link[CBPS:CBPS]{CBPS::CBPS()}}, \code{\link[=formatForWeights]{formatForWeights()}}, \code{\link[=createShortForms]{createShortForms()}} -} diff --git a/man/fitModel.Rd b/man/fitModel.Rd index d3d6bd1a..495ef0aa 100644 --- a/man/fitModel.Rd +++ b/man/fitModel.Rd @@ -24,12 +24,6 @@ fitModel( } \arguments{ \item{model}{user-specified model} - -\item{msm_object}{msm object that contains all relevant user inputs} - -\item{data_for_model_with_weights_cutoff}{dataset with truncated weights see truncateWeights} - -\item{balance_stats_final}{final bal stats conducted w/ full forms} } \value{ fits @@ -40,9 +34,3 @@ fits Fit weighted model This code fits a weighted marginal structural model to examine the effects of different exposure histories on outcome } -\examples{ -fitModel(object, data_for_model_with_weights_cutoff, balance_stats_final, model="m3") -} -\seealso{ -\code{\link[=truncateWeights]{truncateWeights()}}, \code{\link[=asesssBalance]{asesssBalance()}} -} diff --git a/man/imputeData.Rd b/man/imputeData.Rd index 2ce5bc10..a3e330c1 100644 --- a/man/imputeData.Rd +++ b/man/imputeData.Rd @@ -17,8 +17,6 @@ imputeData( ) } \arguments{ -\item{msm_object}{msm object that contains all relevant user inputs} - \item{data_to_impute}{output from dataToImpute} } \value{ @@ -27,7 +25,3 @@ imputed_datasets imputation results \description{ Creates m imputed datasets from original datasets using mice::mice } -\examples{ -imputeData(msm_object, data_to_impute, read_imps_from_file = "no") - -}