From 2d0e2558a8cd78294730acbab2d3d327ba7522e3 Mon Sep 17 00:00:00 2001 From: "Isabella Stallworthy, PhD" Date: Wed, 27 Sep 2023 10:07:43 -0400 Subject: [PATCH] adding packages and incorporating noah's push --- .DS_Store | Bin 6148 -> 6148 bytes DESCRIPTION | 9 +- NAMESPACE | 32 ----- R/assessBalance.R | 277 +++++++++++++++++++------------------ R/calcBalStats.R | 178 ++++++++++++------------ R/compareHistories.R | 16 +-- R/createFormulas.R | 5 +- R/createWeights.R | 10 +- R/fitModel.R | 1 + R/make_love_plot.R | 6 +- R/trimWeights.R | 6 +- examplePipelineRevised.Rmd | 8 +- man/make_love_plot.Rd | 1 - 13 files changed, 266 insertions(+), 283 deletions(-) diff --git a/.DS_Store b/.DS_Store index 56e92df7eb62050b606b3449385cdc025432ca63..72a46ace362fbb3fe87eeb8c7771103387b8a97c 100644 GIT binary patch delta 428 zcmZuuF>As=7=3rw(x?RsibJ78e}KWiAeuo+G^8C|Pa{Q%CbXp#Iyls=OC@`ku0oNF z{aGD~Q+*dZXz0Or-#d8k-S_SmjYZ?FCXb3sU0d3hqO$hB#7C^8s3fQId0s*r4zAIH zj}AQCaPOhp38Ud4j0OPCSWTAwn=K;X93wnIV~Q9dv4=6yhB)qF;Q%yo+2a9#(>?7j69QhSgI$RvldL&XG_>D$|1xuv0&b2b`E|H cpvujH9N(EI^NTogFaQA~0|U$E2$40+0LQcuV*mgE diff --git a/DESCRIPTION b/DESCRIPTION index 51d1ff67..6334271b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,15 +22,13 @@ License: GPL-3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.2 +RoxygenNote: 7.2.3 Imports: WeightIt, marginaleffects, - tidyr, stats, ggplot2, matrixStats, - plyr, knitr, survey, jtools, @@ -38,7 +36,10 @@ Imports: stargazer, gtools, cobalt, - stringr + stringr, + huxtable, + officer, + flextable Suggests: testthat (>= 3.0.0) Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index b5f8964d..e5760d91 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,36 +17,4 @@ export(get_reference_values) export(make_love_plot) export(perform_multiple_comparison_correction) export(trimWeights) -importFrom(WeightIt,weightitMSM) -importFrom(dplyr,"%>%") -importFrom(dplyr,bind_rows) -importFrom(dplyr,filter) -importFrom(dplyr,mutate) -importFrom(dplyr,select) -importFrom(ggplot2,aes) -importFrom(ggplot2,element_blank) -importFrom(ggplot2,element_rect) -importFrom(ggplot2,element_text) -importFrom(ggplot2,geom_histogram) -importFrom(ggplot2,geom_point) -importFrom(ggplot2,geom_text) -importFrom(ggplot2,geom_vline) -importFrom(ggplot2,ggplot) -importFrom(ggplot2,ggsave) -importFrom(ggplot2,ggtitle) -importFrom(ggplot2,guide_axis) -importFrom(ggplot2,scale_y_discrete) -importFrom(ggplot2,theme) -importFrom(ggplot2,xlab) -importFrom(ggplot2,xlim) -importFrom(gtools,permutations) -importFrom(jtools,export_summs) -importFrom(knitr,kable) -importFrom(marginaleffects,avg_predictions) -importFrom(marginaleffects,hypotheses) -importFrom(mice,pool) -importFrom(stargazer,stargazer) -importFrom(stats,p.adjust) -importFrom(stringr,str_count) -importFrom(survey,svydesign) importFrom(survey,svyglm) diff --git a/R/assessBalance.R b/R/assessBalance.R index 2072373e..08855ec9 100644 --- a/R/assessBalance.R +++ b/R/assessBalance.R @@ -136,11 +136,11 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, stop("Please supply data as either a dataframe with no missing data or imputed data in the form of a mids object or path to folder with imputed csv datasets.", call. = FALSE) } - else if (!mice::is.mids(data) & !is.data.frame(data) & !inherits(data, "list")) { - stop("Please provide wide data as a 'mids' object, a data frame, or a list of imputed csv files in the 'data' field.", call. = FALSE) + else if (!inherits(data, "mids") && !is.data.frame(data) && + !(is.list(data) && all(vapply(data, is.data.frame, logical(1L))))) { + stop("Please provide either a 'mids' object, a data frame, or a list of imputed csv files in the 'data' field.", call. = FALSE) } - if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } @@ -221,10 +221,13 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, } - folder <- ifelse(type == "prebalance", "prebalance/", "weighted/") + # folder <- ifelse(type == "prebalance", "prebalance/", "weighted/") + + mi <- !is.data.frame(data) + + folder <- switch(type, "prebalance" = "prebalance/", "weighted/") if (save.out){ - #creating required directories balance_dir <- file.path(home_dir, "balance") if (!dir.exists(balance_dir)) { dir.create(balance_dir) @@ -239,7 +242,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, } } - weights_method <- ifelse(type == "prebalance", "no weights", weights[[1]]$method) + weights_method <- switch(type, "prebalance" = "no weights", weights[[1]]$method) form_name <- sapply(strsplit(names(formulas[1]), "_form"), "[",1) @@ -247,28 +250,29 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, ### Getting balance statistics if (type == "prebalance"){ - if (verbose){ - message(paste0("USER ALERT: The following statistics display covariate imbalance at each exposure time point prior to weighting, - using ", form_name, " formulas."), "\n") + if (verbose) { + message(sprintf("USER ALERT: The following statistics display covariate imbalance at each exposure time point prior to weighting, + using %s formulas.\n", + form_name)) cat("\n") } # Running balance stats function, unweighted, on each imputed dataset - if (mice::is.mids(data) | inherits(data, "list")){ + if (mi) { + + if (inherits(data, "mids")) { - if (mice::is.mids(data)){ - m = data$m + m <- data$m bal_stats <- lapply(seq_len(m), function(k) { d <- as.data.frame(mice::complete(data, k)) - if (length(which(is.na(d))) > 0){ + if (anyNA(d)){ stop("This code requires complete data. Consider imputation if missingness < 20% and is reasonably Missing at Random (MAR).", call. = FALSE) } - exposure_type <- ifelse(inherits(d[, paste0(exposure, '.', - exposure_time_pts[1])], "numeric"), "continuous", "binary") + exposure_type <- if (is.numeric(d[, paste0(exposure, '.', exposure_time_pts[1])])) "continuous" else "binary" if (sum(duplicated(d$"ID")) > 0){ stop("Please provide wide imputed datasets with a single row per ID.", call. = FALSE) @@ -278,20 +282,18 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, balance_thresh, k = k, weights = NULL, imp_conf, verbose, save.out) }) } - - else if (inherits(data, "list")){ + else { m = length(data) bal_stats <- lapply(seq_len(m), function(k) { d <- data[[k]] - if (length(which(is.na(d))) > 0){ + if (anyNA(d)){ stop("This code requires complete data. Consider imputation if missingness < 20% and is reasonably Missing at Random (MAR).", call. = FALSE) } - exposure_type <- ifelse(inherits(d[, paste0(exposure, '.', - exposure_time_pts[1])], "numeric"), "continuous", "binary") + exposure_type <- if (is.numeric(d[, paste0(exposure, '.', exposure_time_pts[1])])) "continuous" else "binary" if (sum(duplicated(d$"ID")) > 0){ stop("Please provide wide imputd datasets with a single row per ID.", call. = FALSE) @@ -304,12 +306,12 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, # Save out balance stats for each imputed dataset - bal_stats_all_imp <- do.call(bind_rows, bal_stats) + bal_stats_all_imp <- do.call(dplyr::bind_rows, bal_stats) bal_stats_all_imp <- bal_stats_all_imp[order(bal_stats_all_imp$covariate), ] if (save.out){ - write.csv(bal_stats_all_imp, paste0(home_dir, "/balance/", type, "/", - exposure, "_all_imps_balance_stat_summary.csv"), row.names = FALSE) + write.csv(bal_stats_all_imp, sprintf("%s/balance/%s/%s_all_imps_balance_stat_summary.csv", + home_dir, type, exposure)) if (verbose ){ cat("Pre balance statistics for each imputed dataset have now been saved in the 'balance/prebalance/' folder", "\n") } @@ -328,49 +330,41 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, #adds custom bal thresh info if (!is.null(imp_conf)){ - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = ifelse(all_bal_stats$covariate %in% imp_conf, - # balance_thresh[1], balance_thresh[2])) + all_bal_stats$bal_thresh <- ifelse(all_bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2]) - # all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - # all_bal_stats$bal_thresh, 1, 0) ) all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) } else{ - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = balance_thresh) all_bal_stats$bal_thresh <- balance_thresh - # - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - # all_bal_stats$bal_thresh, 1, 0) ) + all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) } - cat("\n") if (verbose){ + cat("\n") + cat(paste0("*** Averaging Across All Imputations ***"), "\n") } tot_covars <- sapply(strsplit(all_bal_stats$covariate, "\\."), `[`, 1) } + else { - else if (is.data.frame(data)){ - - if(!inherits(data, "data.frame")){ - stop("Please provide data as a data frame, mids object, or list.", call. = FALSE) - } - if (sum(duplicated(data$"ID")) > 0){ + if (sum(duplicated(data[["ID"]])) > 0){ stop("Please provide wide dataset with a single row per ID.", call. = FALSE) } - if (length(which(is.na(data))) > 0){ + + if (anyNA(data)) { stop("This code requires complete data. Consider imputation if missingness < 20% and is reasonably Missing at Random (MAR).", call. = FALSE) } - exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric"), "continuous", "binary") + exposure_type <- if (is.numeric(data[, paste0(exposure, '.', exposure_time_pts[1])])) "continuous" else "binary" bal_stats <- calcBalStats(home_dir, data, formulas, exposure, exposure_time_pts, outcome, balance_thresh, k = 0, weights = NULL, imp_conf, verbose, save.out) @@ -393,21 +387,21 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, # Weighted else if (type == "weighted"){ - if(verbose){ - message(paste0("USER ALERT: The following statistics display covariate imbalance at each exposure time point following IPTW weighting, - using ", form_name, " formulas."), "\n") + if (verbose) { + message(sprintf("USER ALERT: The following statistics display covariate imbalance at each exposure time point following IPTW weighting, + using %s formulas.\n", form_name)) cat("\n") } - if (mice::is.mids(data) | inherits(data, "list")){ + if (mi) { # Running balance stats function, unweighted, on each imputed dataset w/ no weights - if (mice::is.mids(data)){ + if (inherits(data, "mids")) { m <- data$m bal_stats <- lapply(seq_len(m), function(k) { d <- as.data.frame(mice::complete(data, k)) - if (length(which(is.na(d))) > 0){ + if (anyNA(d)){ stop("This code requires complete data. Consider imputation if missingness < 20% and is reasonably Missing at Random (MAR).", call. = FALSE) } @@ -421,18 +415,17 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, balance_thresh, k = k, weights = w, imp_conf, verbose, save.out) }) } + else { + m <- length(data) - - else if (inherits(data, "list")){ - m = length(data) bal_stats <- lapply(seq_len(m), function(k) { d <- data[[k]] - if (length(which(is.na(d))) > 0){ + if (anyNA(d)){ stop("This code requires complete data. Consider imputation if missingness < 20% and is reasonably Missing at Random (MAR).", call. = FALSE) } - if (sum(duplicated(d$"ID")) > 0){ + if (sum(duplicated(d[["ID"]])) > 0){ stop("Please provide wide imputed datasets with a single row per ID.", call. = FALSE) } @@ -445,19 +438,18 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, # Save out balance stats for each imputed dataset - bal_stats_all_imp <- do.call(bind_rows, bal_stats) + bal_stats_all_imp <- do.call(dplyr::bind_rows, bal_stats) bal_stats_all_imp <- bal_stats_all_imp[order(bal_stats_all_imp$covariate), ] if (save.out){ - write.csv(bal_stats_all_imp, paste0(home_dir, "/balance/", type, "/", exposure, - "_all_imps_balance_stat_summary.csv"), row.names = FALSE) + write.csv(bal_stats_all_imp, sprintf("%s/balance/%s/%s_all_imps_balance_stat_summary.csv", + home_dir, type, exposure)) if (verbose){ cat("Weighted balance statistics for each imputed dataset have now been saved in the 'balance/weighted/' folder", "\n") } } - # Gathering imbalanced covariate statistics to average across imputed datasets for the final list/assessment of imbalanced covariates # Averaging across imputed datasets all_bal_stats <- data.frame( @@ -469,22 +461,16 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, # #adds custom bal thresh info if (!is.null(imp_conf)){ - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = ifelse(all_bal_stats$covariate %in% imp_conf, - # balance_thresh[1], balance_thresh[2])) + all_bal_stas$bal_thresh <-ifelse(all_bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2]) - # all_bal_stats <- all_bal_stats %>%dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - # all_bal_stats$bal_thresh, 1, 0) ) all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) } else{ - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = balance_thresh) all_bal_stats$bal_thresh <- balance_thresh - # all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - # all_bal_stats$bal_thresh, 1, 0) ) all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) } @@ -496,9 +482,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, tot_covars <- sapply(strsplit(all_bal_stats$covariate, "\\."), `[`, 1) } - - - if (is.data.frame(data)){ + else { if (sum(duplicated(data$"ID")) > 0){ stop("Please provide wide dataset with a single row per ID.", call. = FALSE) @@ -528,26 +512,25 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, tot_cons <- tot_covars[!duplicated(tot_covars)] # Total domains/constructs # Make love plot to summarize imbalance at each exposure time point - data_type <- ifelse(mice::is.mids(data) | inherits(data, "list"), "imputed", "single") + data_type <- if (mi) "imputed" else "single" - weights_method = ifelse(type == "weighted", weights[[1]]$method, "no weights") + weights_method <- if (type == "weighted") weights[[1]]$method else "no weights" sapply(seq_along(exposure_time_pts), function(i) { exposure_time_pt <- exposure_time_pts[i] - if (mice::is.mids(data)){ - exposure_type <- ifelse(inherits(mice::complete(data, 1)[, paste0(exposure, '.', - exposure_time_pts[1])], "numeric"), "continuous", "binary") + if (inherits(data, "mids")) { + exposure_type <- if (is.numeric(mice::complete(data, 1)[, paste0(exposure, '.', exposure_time_pts[1])])) "continuous" else "binary" + } - else if (inherits(data, "list")){ - exposure_type <- ifelse(inherits(data[[1]][, paste0(exposure, '.', exposure_time_pts[1])], "numeric"), "continuous", "binary") + else if (!is.data.frame(data)) { + exposure_type <- if (is.numeric(data[[1]][, paste0(exposure, '.', exposure_time_pts[1])])) "continuous" else "binary" } - else if (is.data.frame(data)){ - exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric"), "continuous", "binary") + else { + exposure_type <- if (is.numeric(data[[paste0(exposure, '.', exposure_time_pts[1])]])) "continuous" else "binary" } - # temp <- all_bal_stats %>% filter(exp_time == exposure_time_pt) - temp <- all_bal_stats[all_bal_stats$exp_time == exposure_time_pt, ] + temp <- all_bal_stats[all_bal_stats$exp_time == exposure_time_pt, , drop = FALSE] make_love_plot(home_dir, folder, exposure, exposure_time_pt, exposure_type, k = 0, form_name, temp, data_type, balance_thresh, weights_method, imp_conf, verbose, save.out) @@ -555,22 +538,23 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, if (save.out){ + outfile <- sprintf("%s/balance/%s/%s-%s_all_%s_%s_associations.html", + home_dir, type, exposure, outcome, type, weights_method) + # Save out all correlations/std mean differences - sink(paste0(home_dir, "/balance/", type, "/", exposure, "-", outcome, "_all_", type, "_", weights_method, "_associations.html")) + sink(outfile) stargazer::stargazer(all_bal_stats, type = "html", digits = 2, column.labels = colnames(all_bal_stats), summary = FALSE, - rownames = FALSE, header = FALSE, out = paste0(home_dir, "/balance/", type, "/", exposure, "-", - outcome, "_all_", type,"_", weights_method, "_associations.html")) + rownames = FALSE, header = FALSE, out = outfile) sink() if (verbose){ - if (mice::is.mids(data) | inherits(data, "list")){ - cat(paste0("Summary plots for ", form_name, " ", exposure, - " averaged across all imputations have been saved out for each time point in the 'balance/", - type, "/plots/' folder."), "\n") + if (mi){ + cat(sprintf("Summary plots for %s %s averaged across all imputations have been saved out for each time point in the 'balance/%s/plots/' folder.\n", + form_name, exposure, type)) } - else{ - cat(paste0("Summary plots for ", form_name, " ", exposure, - " have now been saved out for each time point in the 'balance/", type, "/plots/' folder."), "\n") + else { + cat(sprintf("Summary plots for %s %s have now been saved out for each time point in the 'balance/%s/plots/' folder.\n", + form_name, exposure, type)) } } } @@ -578,76 +562,96 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, if (save.out){ # Saving out all pre-balance associations - write.csv(all_bal_stats, paste0(home_dir, "/balance/", type, "/", exposure, "_", type, - "_", weights_method, "_stat_summary.csv"), row.names = FALSE) + write.csv(all_bal_stats, sprintf("%s/balance/%s/%s_%s_%s_stat_summary.csv", + home_dir, type, exposure, type, weights_method), + row.names = FALSE) if (verbose){ - if (mice::is.mids(data) | inherits(data, "list")){ - cat(paste0("Check 'balance/", type, "/' folder for a table of all ", type, " correlations or - standardized mean differences averaged across imputed datasets."), "\n") + if (mi) { + cat(sprintf("Check 'balance/%s/' folder for a table of all %s correlations or + standardized mean differences averaged across imputed datasets.\n", + type, type)) } else { - cat(paste0("Check 'balance/", type, "/' folder for a table of all ", type, " correlations or - standardized mean differences."), "\n") + cat(sprintf("Check 'balance/%s/' folder for a table of all %s correlations or + standardized mean differences.\n", + type, type)) } } } # Finding all imbalanced variables - # unbalanced_covars <- all_bal_stats %>% - # filter(balanced == 0) - unbalanced_covars <- all_bal_stats[all_bal_stats$balanced == 0, ] + unbalanced_covars <- all_bal_stats[all_bal_stats$balanced == 0, , drop = FALSE] unbalanced_constructs <- sapply(strsplit(unbalanced_covars$covariate, "\\."), "[", 1)[!duplicated(sapply(strsplit(unbalanced_covars$covariate, "\\."), "[", 1))] if (verbose){ - if (mice::is.mids(data) | inherits(data, "list")){ - cat(paste0("USER ALERT: Averaging across all imputed datasets for exposure ", exposure, " using the ", - form_name, " formulas and ", weights_method, " :"), "\n") - - cat(paste0("The median absolute value relation between exposure and confounder is ", - round(median(abs(all_bal_stats$avg_bal)), 2), " (range = ", - round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_stats$avg_ba), 2), ")."), "\n") - - if (nrow(unbalanced_covars) > 0){ - cat(paste0("As shown below, the following ", nrow(unbalanced_covars), " covariates across time points out of ", - length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), "%) spanning ", - length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), - "%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to ", - exposure, " of ", round(median(abs(as.numeric(unlist( - # unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced == 0) %>% - # dplyr:: select(avg_bal) - - unbalanced_covars[unbalanced_covars$balanced == 0, ][, "avg_bal"] - )))), 2), " (range=", - round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n") + if (mi) { + cat(sprintf("USER ALERT: Averaging across all imputed datasets for exposure %s using the %s formulas and %s :\n", + exposure, form_name, weights_method)) + + cat(sprintf("The median absolute value relation between exposure and confounder is %s (range = %s-%s).\n", + round(median(abs(all_bal_stats$avg_bal)), 2), + round(min(all_bal_stats$avg_ba), 2), + round(max(all_bal_stats$avg_ba), 2))) + + if (nrow(unbalanced_covars) > 0) { + cat(sprintf("As shown below, the following %s covariates across time points out of %s total (%s%%) spanning %s domains out of %s (%s%%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to %s of %s (range=%s-%s) :\n", + nrow(unbalanced_covars), + length(tot_covars), + round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), + length(unbalanced_constructs), + length(tot_cons), + round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), + exposure, + round(median(abs(as.numeric(unbalanced_covars$avg_bal[unbalanced_covars$balanced == 0]))), 2), + round(min(unbalanced_covars$avg_bal), 2), + round(max(unbalanced_covars$avg_bal), 2))) } else{ cat("There are no imbalanced covariates.", "\n") } } else { - cat(paste0("USER ALERT: For exposure ", exposure, " using the ", form_name," formulas and ", weights_method, " :"), "\n") - - cat(paste0("The median absolute value relation between exposure and confounder is ", - round(median(abs(all_bal_stats$avg_bal)), 2), " (range = ", - round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_stats$avg_ba), 2), ")."), "\n") + # cat(paste0("USER ALERT: For exposure ", exposure, " using the ", form_name," formulas and ", weights_method, " :"), "\n") + cat(sprintf("USER ALERT: For exposure %s using the %s formulas and %s :\n", + exposure, form_name, weights_method)) + + # cat(paste0("The median absolute value relation between exposure and confounder is ", + # round(median(abs(all_bal_stats$avg_bal)), 2), " (range = ", + # round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_stats$avg_ba), 2), ")."), "\n") + cat(sprintf("The median absolute value relation between exposure and confounder is %s (range = %s -%s). \n", + round(median(abs(all_bal_stats$avg_bal)), 2), + round(min(all_bal_stats$avg_ba), 2), + round(max(all_bal_stats$avg_ba), 2))) if (nrow(unbalanced_covars) > 0){ - cat(paste0("As shown below, the following ", nrow(unbalanced_covars), " covariates across time points out of ", - length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), "%) spanning ", - length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), - "%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to ", - exposure, " of ", round(median(abs(as.numeric(unlist( - # unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced == 0) %>% - # dplyr:: select(avg_bal) - unbalanced_covars[unbalanced_covars$balanced == 0, ][, "avg_bal"] - - )))), 2), " (range=", - round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n") + # cat(paste0("As shown below, the following ", nrow(unbalanced_covars), " covariates across time points out of ", + # length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), "%) spanning ", + # length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), + # "%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to ", + # exposure, " of ", round(median(abs(as.numeric(unlist( + # # unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced == 0) %>% + # # dplyr:: select(avg_bal) + # unbalanced_covars[unbalanced_covars$balanced == 0, ][, "avg_bal"] + # + # )))), 2), " (range=", + # round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n") + + cat(sprintf("As shown below, the following %s covariates across time points out of %s total (%s%%) spanning %s domains out of %s (%s%%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to %s of %s (range=%s-%s) :\n", + nrow(unbalanced_covars), + length(tot_covars), + round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), + length(unbalanced_constructs), + length(tot_cons), + round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), + exposure, + round(median(abs(as.numeric(unlist(unbalanced_covars[unbalanced_covars$balanced == 0, ][, "avg_bal"])))), 2), + round(min(unbalanced_covars$avg_bal), 2), + round(max(unbalanced_covars$avg_bal), 2))) } else{ cat("There are no imbalanced covariates.", "\n") } @@ -663,9 +667,12 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, } if (save.out){ # Save out only imbalanced covariates - sink(paste0(home_dir, "/balance/", type, "/", exposure, "-", outcome, "_",type,"_", weights_method, "_all_covariates_imbalanced.html")) - stargazer(unbalanced_covars, type = "html", digits = 2, column.labels = colnames(unbalanced_covars), summary = FALSE, rownames = FALSE, header = FALSE, - out = paste0(home_dir, "/balance/", type, "/", exposure, "-", outcome, "_", type, "_", weights_method, "_all_covariates_imbalanced.html")) + outfile <- sprintf("%s/balance/%s/%s-%s_%s_%s_all_covariates_imbalanced.html", + home_dir, type, exposure, outcome, type, weights_method) + + sink(outfile) + stargazer::stargazer(unbalanced_covars, type = "html", digits = 2, column.labels = colnames(unbalanced_covars), summary = FALSE, rownames = FALSE, header = FALSE, + out = outfile) sink() } } diff --git a/R/calcBalStats.R b/R/calcBalStats.R index d65c0268..3b9ac797 100644 --- a/R/calcBalStats.R +++ b/R/calcBalStats.R @@ -102,9 +102,9 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ } - folder <- ifelse(weighted == 0, "prebalance/", "weighted/") + folder <- if (weighted) "weighted/" else "prebalance/" - data_type <- ifelse(k == 0, "single", "imputed") + data_type <- if (k == 0) "single" else "imputed" if (data_type == "imputed" & verbose){ cat(paste0("**Imputation ", k, "**"), "\n") @@ -151,7 +151,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # GETTING BALANCE STATS FOR T=1 W/ NO EXPOSURE HISTORY (ok to standardize immediately) if (length(lagged_time_pts) == 0) { - # temp <- data temp <- data2 # Unweighted pre-balance checking @@ -260,7 +259,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (length(lagged_time_pts) > 0) { # Merge data with prop_weights by ID - # temp <- merge(data, prop_weights, by = "ID", all.x = TRUE) temp <- merge(data2, prop_weights, by = "ID", all.x = TRUE) #data with factors split up @@ -279,20 +277,21 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ " at time point ", exposure_time_pt)) } - temp <- temp[!temp$history %in% ommitted_histories, ] + temp <- temp[!temp$history %in% ommitted_histories, , drop = FALSE] } #ends hist exc # Unweighted pre-balance checking if (weighted == 0) { # no IPTW weighting but weighting on history if (exposure_type == "continuous") { bal_stats <- sapply(sort(unique(temp$history)), function(i) { # finding balance by history - # temp2 <- temp %>% - # dplyr::filter(history == i) - temp2 <- temp[temp$history == i, ] - #should be same length as covars (already have factors split up) - cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance - subset = temp2$history[temp2$history == i] == i) # subsetting by that history + temp2 <- temp[temp$history == i, , drop = FALSE ] + + cobalt::col_w_cov(temp2[covars], temp2[[paste0(exposure, ".", exposure_time_pt)]], std = FALSE) + + # #should be same length as covars (already have factors split up) + # cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance + # subset = temp2$history[temp2$history == i] == i) # subsetting by that history }) # getting weighted mean across histories (cols), weighting by proportion of those w/ that same history @@ -302,7 +301,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) - # bal_stats <- bal_stats %>% bal_stats$std_bal_stats <- weighted_bal_stats / (sapply(seq(nrow(bal_stats)), function(x) { #issue: looking in data for unweighted vals but factors have additional vars sd(as.numeric(data2[, rownames(bal_stats)[x]]), na.rm = TRUE) }) *# unweighted covar sd @@ -316,8 +314,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ call. = FALSE) } - # bal_stats <- bal_stats %>% - # dplyr::select(contains(c("std"))) bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) @@ -325,9 +321,8 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ else if (exposure_type == "binary") { bal_stats <- sapply(sort(unique(temp$history)), function(i) { - # temp2 <- temp %>% - # dplyr::filter(history == i) - temp2 <- temp[temp$history == i, ] + + temp2 <- temp[temp$history == i, , drop = FALSE ] cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference subset = temp2$history[temp2$history == i] == i) # subsetting by that history @@ -341,7 +336,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after finding weighted balance stats - # bal_stats <- bal_stats %>% bal_stats$std_bal_stats <- weighted_bal_stats/ sapply(seq(ncol(data[, covars])), function(x){ sqrt(mean( #dividing by pool SD estimate (unadjusted) @@ -361,9 +355,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # For a weighted_bal_stat of 0, make std stat also 0 so as not to throw an error bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 - - # bal_stats <- bal_stats %>% - # dplyr::select(contains(c("std"))) bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) } #ends binary @@ -375,10 +366,9 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (exposure_type == "continuous") { # finds balance for each covariate clustered/subset by history - bal_stats <- sapply(sort(unique(temp$history)), function(i) { - # temp2 <- temp %>% - # dplyr::filter(history == i) - temp2 <- temp[temp$history == i, ] + bal_stats <- sapply(sort(unique(temp$history), method = "radix"), function(i) { + + temp2 <- temp[temp$history == i, , drop = FALSE ] cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance subset = temp2$history[temp2$history == i] == i, # subsetting by that history @@ -417,10 +407,9 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ else if (exposure_type == "binary") { # finds balance for each covariate clustered/subset by history - bal_stats <- sapply(sort(unique(temp$history)), function(i) { - # temp2 <- temp %>% - # dplyr::filter(history == i) - temp2 <- temp[temp$history == i, ] + bal_stats <- sapply(sort(unique(temp$history, method = "radix")), function(i) { + + temp2 <- temp[temp$history == i, , drop = FALSE ] cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference subset = temp2$history[temp2$history == i] == i, # subsetting by that history @@ -441,7 +430,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after finding weighted balance stats - # bal_stats <- bal_stats %>% bal_stats$std_bal_stats <- weighted_bal_stats/ sapply(seq(ncol(data[, covars])), function(x){ sqrt(mean( #dividing by pool SD estimate (unadjusted) @@ -458,11 +446,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # For a weighted_bal_stat of 0, make std stat also 0 so as not to throw an error bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 - - # bal_stats <- bal_stats %>% - # dplyr::select(contains(c("std"))) bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) - } #ends binary } #ends weighted @@ -479,40 +463,31 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ #averages across factor levels to create one bal stat per factor variable? data$ID <- as.numeric(data$ID) f_vars <- colnames(data)[sapply(data, is.factor)] - f_stats <- bal_stats[rownames(bal_stats)[sapply(strsplit(rownames(bal_stats), "_"), "[", 1) %in% f_vars], ] - f_stats$name <- sapply(strsplit(rownames(f_stats), "_"), "[", 1) - # test <- f_stats %>% dplyr::group_by(name) %>% dplyr::summarize(new = mean(std_bal_stats)) - test <- aggregate(std_bal_stats ~ name, data = f_stats, - FUN = function(x) new = mean(x)) - - colnames(test) <- c("covariate", "std_bal_stats") - new <- data.frame(std_bal_stats = test$std_bal_stats, - covariate = test$covariate) - rownames(new) <- new$covariate - bal_stats <- rbind(bal_stats[rownames(bal_stats)[!sapply(strsplit(rownames(bal_stats), "_"), "[", 1) %in% f_vars], ], - new) + + if(length(f_vars) > 0){ + f_stats <- bal_stats[rownames(bal_stats)[sapply(strsplit(rownames(bal_stats), "_"), "[", 1) %in% f_vars], ] + f_stats$name <- sapply(strsplit(rownames(f_stats), "_"), "[", 1) + test <- aggregate(std_bal_stats ~ name, data = f_stats, + FUN = function(x) new = mean(x)) + + colnames(test) <- c("covariate", "std_bal_stats") + new <- data.frame(std_bal_stats = test$std_bal_stats, + covariate = test$covariate) + rownames(new) <- new$covariate + bal_stats <- rbind(bal_stats[rownames(bal_stats)[!sapply(strsplit(rownames(bal_stats), "_"), "[", 1) %in% f_vars], ], + new) + } #adds custom bal thresh info if (!is.null(imp_conf)){ - # bal_stats <- bal_stats %>% - # dplyr::mutate(bal_thresh = ifelse(bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2])) bal_stats$bal_thresh <- ifelse(bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2]) - # bal_stats <- bal_stats %>% - # dplyr:: mutate(balanced = ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) ) bal_stats$balanced <- ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) } else{ - # bal_stats <- bal_stats %>% - # dplyr::mutate(bal_thresh = balance_thresh) bal_stats$bal_thresh <- balance_thresh - # bal_stats <- bal_stats %>% - # dplyr:: mutate(balanced = ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) ) bal_stats$balanced <- ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) } - # bal_stats <- bal_stats %>% - # dplyr::mutate(exposure = exposure, exp_time = exposure_time_pt) %>% - # dplyr::mutate(covar_time = sapply(strsplit(covariate, "\\."), "[", 2)) bal_stats$exposure <- exposure bal_stats$exp_time <- exposure_time_pt bal_stats$covar_time <- sapply(strsplit(bal_stats$covariate, "\\."), "[", 2) @@ -541,18 +516,12 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # Summarizing balance - # bal_summary_exp <- all_bal_stats %>% - # dplyr::group_by(exp_time) %>% - # dplyr::summarize(balanced_n = sum(balanced == 1), # Tallying balanced covariates - # imbalanced_n = sum(balanced == 0), # Tallying imbalanced covariates - # n = dplyr::n()) - - bal_summary_exp <- as.data.frame(aggregate(balanced ~ exp_time, data = all_bal_stats, - FUN = function(x) c(balanced_n = sum(x == 1), - imbalanced_n = sum(x == 0), - n = length(x)) )) - bal_summary_exp <- do.call(data.frame, bal_summary_exp) - colnames(bal_summary_exp) <- c("exp_time", "balanced_n", "imbalanced_n", "n") + bal_summary_exp <- as.data.frame(aggregate(balanced ~ exp_time, data = all_bal_stats, + FUN = function(x) c(balanced_n = sum(x == 1), + imbalanced_n = sum(x == 0), + n = length(x)) )) + bal_summary_exp <- do.call(data.frame, bal_summary_exp) + colnames(bal_summary_exp) <- c("exp_time", "balanced_n", "imbalanced_n", "n") if (save.out){ @@ -605,29 +574,66 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (verbose){ if (data_type == "imputed"){ - cat(paste0("USER ALERT: For exposure ", exposure, " imputation ", k, " using ", weights_method, " and ", form_name, " formulas: "), "\n") - cat(paste0("The median absolute value relation between exposure and confounder is ", round(median(abs(all_bal_stats$std_bal_stats)), 2), " (range = ", - round(min(all_bal_stats$std_bal_stats), 2), "-", round(max(all_bal_stats$std_bal_stats), 2), ")."), "\n") + # cat(paste0("USER ALERT: For exposure ", exposure, " imputation ", k, " using ", weights_method, " and ", form_name, " formulas: "), "\n") + cat(sprintf("USER ALERT: For exposure %s imputation %s using %s and %s formulas: \n", + exposure, k, weights_method, form_name)) + + # cat(paste0("The median absolute value relation between exposure and confounder is ", round(median(abs(all_bal_stats$std_bal_stats)), 2), " (range = ", + # round(min(all_bal_stats$std_bal_stats), 2), "-", round(max(all_bal_stats$std_bal_stats), 2), ")."), "\n") + cat(sprintf("The median absolute value relation between exposure and confounder is %s (range = %s - %s).\n", + round(median(abs(all_bal_stats$std_bal_stats)), 2), + round(min(all_bal_stats$std_bal_stats), 2), + round(max(all_bal_stats$std_bal_stats), 2))) + + # cat(paste0("As shown below, ", imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced, + # "%) covariates across time points, corresponding to ", + # remaining_imbalanced_domains, " out of ", total_domains, + # " domains, remain imbalanced with a remaining median absolute value correlation/std mean difference of ", + # remaining_avg_abs_corr, " (range= ", remaining_corr_range, "):"), "\n") + + cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %sout of %s domains, + remain imbalanced with a remaining median absolute value correlation/std mean difference of %s (range= %s):\n", + imbalanced_covars, + total_covars, + percentage_imbalanced, + remaining_imbalanced_domains, + total_domains, + remaining_avg_abs_corr, + remaining_corr_range)) - cat(paste0("As shown below, ", imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced, - "%) covariates across time points, corresponding to ", - remaining_imbalanced_domains, " out of ", total_domains, - " domains, remain imbalanced with a remaining median absolute value correlation/std mean difference of ", - remaining_avg_abs_corr, " (range= ", remaining_corr_range, "):"), "\n") cat("\n") - cat(knitr::kable(bal_summary_exp, caption = paste0("Imbalanced Covariates for imputation ", k, " using ", - weights_method, " and ", form_name, " formulas"), format = 'pipe'), sep = "\n") + cat(knitr::kable(bal_summary_exp, caption = + # paste0("Imbalanced Covariates for imputation ", k, " using ", + # weights_method, " and ", form_name, " formulas"), + sprintf("Imbalanced Covariates for imputation %s using %s and %s formulas", + k, weights_method, form_name), + format = 'pipe'), sep = "\n") + cat("\n") cat("\n") } else { - cat(paste0("As shown below, for exposure ", exposure, " using ", weights_method, ", and ", form_name, " formulas, ", - imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced, "%) covariates across time points corresponding to ", - remaining_imbalanced_domains, " out of ", total_domains, - " domains remain imbalanced with a remaining average absolute value correlation/std mean difference of ", - remaining_avg_abs_corr, " (range= ", remaining_corr_range, ") :"), "\n") + # cat(paste0("As shown below, for exposure ", exposure, " using ", weights_method, ", and ", form_name, " formulas, ", + # imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced, "%) covariates across time points corresponding to ", + # remaining_imbalanced_domains, " out of ", total_domains, + # " domains remain imbalanced with a remaining average absolute value correlation/std mean difference of ", + # remaining_avg_abs_corr, " (range= ", remaining_corr_range, ") :"), "\n") + + cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %sout of %s domains, + remain imbalanced with a remaining median absolute value correlation/std mean difference of %s (range= %s):\n", + imbalanced_covars, + total_covars, + percentage_imbalanced, + remaining_imbalanced_domains, + total_domains, + remaining_avg_abs_corr, + remaining_corr_range)) + cat("\n") - cat(knitr::kable(bal_summary_exp, caption = paste0("Imbalanced covariates using ", - weights_method, " and ", form_name, " formulas"), format = 'pipe'), sep = "\n") + cat(knitr::kable(bal_summary_exp, caption = + # paste0("Imbalanced covariates using ", + # weights_method, " and ", form_name, " formulas"), + sprintf("Imbalanced covariates using %s and %s formulas", weights_method, form_name), + format = 'pipe'), sep = "\n") cat("\n") cat("\n") } diff --git a/R/compareHistories.R b/R/compareHistories.R index 6628ec27..3767d4fe 100644 --- a/R/compareHistories.R +++ b/R/compareHistories.R @@ -618,18 +618,18 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod comparisons <- comparisons[order(comparisons$dose), ] if (length(colors) > 1) { # If user input a list of colors - p <- ggplot2::ggplot(data = comparisons, aes(x = estimate, y = history, color = dose)) + + p <- ggplot2::ggplot(data = comparisons, ggplot2::aes(x = estimate, y = history, color = dose)) + ggplot2::geom_point(size = 5) + ggplot2::scale_color_manual(values = colors) + ggplot2::scale_y_discrete(limits = c(as.character(comparisons$history)), expand = c(0, 0.2)) + - ggplot2::geom_errorbarh(aes(xmin = low_ci, xmax = high_ci), height = 0.6) + + ggplot2::geom_errorbarh(ggplot2::aes(xmin = low_ci, xmax = high_ci), height = 0.6) + ggplot2::xlab(paste0("Predicted ", out_lab, " Value")) + ggplot2::ylab(paste0(exp_lab, " Exposure History")) + ggplot2::xlim(min(comparisons$low_ci) - 1 * sd(comparisons$low_ci), max(comparisons$high_ci) + 1 * sd(comparisons$high_ci)) + ggplot2::theme(text = ggplot2::element_text(size = 14)) + - ggplot2::theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), - panel.background = element_blank(), axis.line = ggplot2::element_line(colour = "black")) + ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = element_blank(), + panel.background = ggplot2::element_blank(), axis.line = ggplot2::element_line(colour = "black")) if (save.out){ ggplot2::ggsave(paste0(home_dir, "/plots/", exposure, "-", outcome, ".jpeg"), plot = p) @@ -639,18 +639,18 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod } } else { # If user lists a palette (default) - p <- ggplot2::ggplot(data = comparisons, aes(x = estimate, y = history, color = dose)) + + p <- ggplot2::ggplot(data = comparisons, ggplot2::aes(x = estimate, y = history, color = dose)) + ggplot2::geom_point(size = 5) + ggplot2::scale_colour_brewer(palette = colors) + ggplot2::scale_y_discrete(limits = c(as.character(comparisons$history)), expand = c(0, 0.2)) + - ggplot2::geom_errorbarh(aes(xmin = low_ci, xmax = high_ci), height = 0.6) + + ggplot2::geom_errorbarh(ggplot2::aes(xmin = low_ci, xmax = high_ci), height = 0.6) + ggplot2::xlab(paste0("Predicted ", out_lab, " Value")) + ggplot2::ylab(paste0(exp_lab, " Exposure History")) + ggplot2::xlim(min(comparisons$low_ci) - 1 * sd(comparisons$low_ci), max(comparisons$high_ci) + 1 * sd(comparisons$high_ci)) + ggplot2::theme(text = ggplot2::element_text(size = 14)) + - ggplot2::theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), - panel.background = element_blank(), axis.line = ggplot2::element_line(colour = "black")) + + ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_blank(), axis.line = ggplot2::element_line(colour = "black")) + ggplot2::guides(fill = ggplot2::guide_legend(title="Dosage")) if (save.out){ diff --git a/R/createFormulas.R b/R/createFormulas.R index 23e2be1c..9e0210dc 100644 --- a/R/createFormulas.R +++ b/R/createFormulas.R @@ -247,15 +247,16 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, # as.numeric(covar_time) > 0) %>% # Finds any lagged imbalanced covars # dplyr::select(covariate) - new <- bal_stats[bal_stats$balanced ==0 & + new <- bal_stats[bal_stats$balanced == 0 & bal_stats$exp_time == time & as.numeric(bal_stats$covar_time) < exposure_time_pts[x-1] & as.numeric(bal_stats$covar_time) > 0, ] - new <- new[, "covariate"] if (nrow(new) > 0) { + new <- new[, "covariate"] + new <- as.character(unlist(new)) time_var_include <- c(time_var_include, new) diff --git a/R/createWeights.R b/R/createWeights.R index 888fd3c9..d2f549a9 100644 --- a/R/createWeights.R +++ b/R/createWeights.R @@ -172,7 +172,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = calculate_weights <- function(data, form, weights_method, SL.library) { if(weights_method == "super"){ - fit <- weightitMSM(form, + fit <- WeightIt::weightitMSM(form, data = data, method = weights_method, stabilize = TRUE, @@ -183,7 +183,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = over = FALSE) } else{ - fit <- weightitMSM(form, + fit <- WeightIt::weightitMSM(form, data = data, method = weights_method, stabilize = TRUE, @@ -227,7 +227,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } # Writes image of the histogram of weights to assess heavy tails - p <- ggplot2::ggplot(data = as.data.frame(fit$weight), aes(x = fit$weight)) + + p <- ggplot2::ggplot(data = as.data.frame(fit$weight), ggplot2::aes(x = fit$weight)) + ggplot2::geom_histogram(color = 'black', bins = 15) + ggplot2::xlab("Weights") + ggplot2::ggtitle(paste0("Distribution of ", weights_method, " weights")) @@ -283,7 +283,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } # Writes image of the histogram of weights to assess heavy tails - p <- ggplot2::ggplot(data = as.data.frame(fit$weight), aes(x = fit$weight)) + + p <- ggplot2::ggplot(data = as.data.frame(fit$weight), ggplot2::aes(x = fit$weight)) + ggplot2::geom_histogram(color = 'black', bins = 15) + ggplot2::xlab("Weights") + ggplot2::ggtitle(paste0("Distribution of ", weights_method, " weights")) @@ -340,7 +340,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } # Writes image of the histogram of weights to assess heavy tails - p <- ggplot2::ggplot(data = as.data.frame(weights[[1]]$weights), aes(x = weights[[1]]$weights)) + + p <- ggplot2::ggplot(data = as.data.frame(weights[[1]]$weights), ggplot2::aes(x = weights[[1]]$weights)) + ggplot2::geom_histogram(color = 'black', bins = 15) + ggplot2::xlab("Weights") + ggplot2::ggtitle(paste0("Distribution of ", weights_method, " weights")) diff --git a/R/fitModel.R b/R/fitModel.R index 17f50b56..6340a7b9 100644 --- a/R/fitModel.R +++ b/R/fitModel.R @@ -5,6 +5,7 @@ #' weights. #' @seealso {[survey::svyglm()] for more on family/link specifications, #' } +#' @importFrom survey svyglm #' @param home_dir path to home directory (required if 'save.out' = TRUE) #' @param data data in wide format as: a data frame, list of imputed data #' frames, or mids object diff --git a/R/make_love_plot.R b/R/make_love_plot.R index d83bb653..ccb0af08 100644 --- a/R/make_love_plot.R +++ b/R/make_love_plot.R @@ -105,9 +105,9 @@ make_love_plot <- function(home_dir, folder, exposure, exposure_time_pt, exposur } # Make love plot per exposure time point - lp <- ggplot2::ggplot(aes(x = avg_bal, y = reorder(as.factor(covariate), avg_bal)), data = balance_stats) + - ggplot2::geom_point(aes(y = reorder(as.factor(covariate), avg_bal), x = avg_bal, fill = "white", alpha = 1)) + - ggplot2::geom_text(aes(label = labels, hjust = -0.2, vjust = 0.2), size = 1.5, color = "red") + + lp <- ggplot2::ggplot(ggplot2::aes(x = avg_bal, y = reorder(as.factor(covariate), avg_bal)), data = balance_stats) + + ggplot2::geom_point(ggplot2::aes(y = reorder(as.factor(covariate), avg_bal), x = avg_bal, fill = "white", alpha = 1)) + + ggplot2::geom_text(ggplot2::aes(label = labels, hjust = -0.2, vjust = 0.2), size = 1.5, color = "red") + ggplot2::xlab(x_lab) + ggplot2::ylab("Covariate") + ggplot2::xlim(min_val, max_val) + diff --git a/R/trimWeights.R b/R/trimWeights.R index d3465f35..763eaeea 100644 --- a/R/trimWeights.R +++ b/R/trimWeights.R @@ -145,7 +145,7 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v } # Save histogram of new weights - p <- ggplot2::ggplot(as.data.frame(t), aes(x = t)) + + p <- ggplot2::ggplot(as.data.frame(t), ggplot2::aes(x = t)) + ggplot2::geom_histogram(color = 'black', bins = 15) if(verbose){ @@ -181,9 +181,9 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v } # Save histogram of new weights - p <- ggplot2::ggplot(as.data.frame(t), aes(x = t)) + + p <- ggplot2::ggplot(as.data.frame(t), ggplot2::aes(x = t)) + ggplot2::geom_histogram(color = 'black', bins = 15) + - ggtitle(paste0("Weights trimmed at the ", quantile, "th value")) + ggplot2::ggtitle(paste0("Weights trimmed at the ", quantile, "th value")) if(verbose){ print(p) diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd index 8a48982d..3519bcd9 100644 --- a/examplePipelineRevised.Rmd +++ b/examplePipelineRevised.Rmd @@ -56,7 +56,7 @@ library(devMSMsHelpers) #prior to re-installing using the code above. Sorry, this is annoying! #conducting package checks & tests from documentation --just for IS to run; comment out when testing the workflow -# devtools::check() +devtools::check() ``` @@ -198,9 +198,9 @@ data <- mice::complete(imputed_data, 1) #just for testing purposes #IS testing multi-level factors data$"PmMrSt2" = as.factor(data$"PmMrSt2") data$HomeOwnd = as.factor(data$HomeOwnd) -# library(dplyr) -# data <- data %>% -# dplyr::select(-c(contains(c(":", "Childhood", "Infancy", "Toddlerhood", "pcx")))) #removing these --came from old dataset +library(dplyr) +data <- data %>% + dplyr::select(-c(contains(c(":", "Childhood", "Infancy", "Toddlerhood", "pcx")))) #removing these --came from old dataset ``` diff --git a/man/make_love_plot.Rd b/man/make_love_plot.Rd index 0e648a1c..d900c5ae 100644 --- a/man/make_love_plot.Rd +++ b/man/make_love_plot.Rd @@ -91,7 +91,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "weighted", weights = w, formulas = f,