Skip to content

Commit

Permalink
Merge pull request #48 from istallworthy/testing
Browse files Browse the repository at this point in the history
testing with mids
  • Loading branch information
istallworthy authored Aug 19, 2023
2 parents ed89118 + 92d0d38 commit f62ce1f
Show file tree
Hide file tree
Showing 11 changed files with 371 additions and 717 deletions.
103 changes: 8 additions & 95 deletions R/assessBalance.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty

exposure_time_pts <- as.numeric(sapply(strsplit(tv_confounders[grepl(exposure, tv_confounders)] , "\\."), "[",2))

exposure_type <- ifelse(class(data[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary")
# exposure_type <- ifelse(class(data[, paste0(exposure, '.', exposure_time_pts[1])]) == "numeric", "continuous", "binary")

weights_method <- ifelse(type == "prebalance", "no weights", weights[[1]]$method)

Expand Down Expand Up @@ -331,54 +331,7 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty

})

# labels <- ifelse(temp$balanced == 0, temp$covariate, "")
# min_val <- ifelse(min(temp$avg_bal) < 0, min(temp$avg_bal) - 0.02, min(balance_thresh) - 0.02)
# max_val <- ifelse(max(temp$avg_bal) > 0, max(temp$avg_bal) + 0.02, max(balance_thresh) + 0.02)


# lp <- ggplot2::ggplot(temp, aes(x = avg_bal, y = covariate)) +
# ggplot2::geom_point(aes(fill = "white", alpha = 1), na.rm = TRUE) +
# ggplot2::geom_text(aes(label = labels, hjust = -0.2, vjust = 0.2), size = 1.5, color = "red") +
# ggplot2::xlab(x_lab) +
# ggplot2::ylab("Covariate") +
# ggplot2::xlim(min_val, max_val) +
# ggplot2::ggtitle(paste0(exposure, "(t=", exposure_time_pt, ") Balance")) +
# ggplot2::theme(
# panel.background = ggplot2::element_rect(fill = "white"),
# axis.text.x = ggplot2::element_text(color = "black"),
# axis.text.y = ggplot2::element_text(color = "black"),
# axis.text = ggplot2::element_text(size = 8),
# panel.border = ggplot2::element_rect(fill = NA, color = "black"),
# plot.background = ggplot2::element_blank(),
# plot.title = ggplot2::element_text(size = 10),
# legend.background = ggplot2::element_blank(),
# legend.key = ggplot2::element_blank(),
# legend.position = "none"
# ) +
# ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
#
# if (nrow(temp) > 40) { # Stagger covariate labels if there are many to ease viewing
# lp <- lp + ggplot2::scale_y_discrete(guide = ggplot2::guide_axis(n.dodge = 2))
# }
#
# if (!is.null(imp_conf)){ #adding threshold lines
# lp <- lp + ggplot2::geom_vline(xintercept = balance_thresh[1], linetype = "dashed", color = "red")
# lp <- lp + ggplot2::geom_vline(xintercept = -balance_thresh[1], linetype = "dashed", color = "red")
# lp <- lp + ggplot2::geom_vline(xintercept = balance_thresh[2], linetype = "dashed", color = "red")
# lp <- lp + ggplot2::geom_vline(xintercept = -balance_thresh[2], linetype = "dashed", color = "red")
# } else{
# lp <- lp + ggplot2::geom_vline(xintercept = balance_thresh, linetype = "dashed", color = "red")
# lp <- lp + ggplot2::geom_vline(xintercept = -balance_thresh, linetype = "dashed", color = "red")
# }
#
# if (class(data) == "mids" | class(data) == "list"){
# suppressMessages(ggplot2::ggsave(lp, filename = paste0(home_dir, "/balance/", type, "/plots/", form_name, "_", weights_method, "_", exposure, "_all_imps_",
# exposure_time_pt, "_summary_", type, "_plot.jpeg")))
# } else{
# suppressMessages(ggplot2::ggsave(lp, filename = paste0(home_dir, "/balance/", type, "/plots/", form_name, "_", weights_method, "_", exposure, "_",
# exposure_time_pt, "_summary_", type, "_plot.jpeg")))
# }
# })


if (user.o == TRUE){
if (class(data) == "mids" | class(data) == "list"){
Expand Down Expand Up @@ -408,12 +361,6 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty
}
}

# <<<<<<< Updated upstream
# # Renames factor covariates
# unbalanced_covars$covariate[sapply(strsplit(sapply(strsplit(unbalanced_covars$covariate, "_"), "[", 1), "\\."), "[", 1) %in% factor_covariates] <-
# sapply(strsplit(unbalanced_covars$covariate, "_"), "[", 1)[sapply(strsplit(sapply(strsplit(unbalanced_covars$covariate, "_"), "[", 1), "\\."), "[", 1) %in% factor_covariates]
# unbalanced_constructs <- sapply(strsplit(unbalanced_covars$covariate, "\\."), "[", 1)[!duplicated(sapply(strsplit(unbalanced_covars$covariate, "\\."), "[", 1))]
# =======

# Save out all correlations/std mean differences
sink(paste0(home_dir, "/balance/", type, "/", exposure, "-", outcome, "_all_", type, "_", weights_method, "_associations.html"))
Expand All @@ -435,56 +382,27 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty
unbalanced_covars <- all_bal_stats %>%
filter(balanced == 0)

# if (class(data) == "mids" | class(data) == "list"){
# cat(paste0("USER ALERT: Averaging across all imputed datasets for exposure ", exposure, " using the ", form_name, " formulas and ", weights_method, " :"), "\n")
# cat(paste0("The median absolute value relation between exposure and confounder is ", round(median(abs(all_bal_stats$avg_bal)), 2), " (range = ",
# round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_stats$avg_ba), 2), ")."), "\n")
# cat("\n")
#
# message(paste0("USER ALERT: Averaging across all imputed datasets for exposure ", exposure, " using the ",
# form_name, ", the following ", nrow(unbalanced_covars), " covariates across time points out of ",
# length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), ",%) spanning ",
# length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2),
# "%) are imbalanced with a remaining average absolute value correlation/std mean difference in relation to ",
# exposure, " of ", round(mean(abs(unbalanced_covars$avg_bal)), 2), " (range=",
# round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n")
# }else {
# cat(paste0("USER ALERT: For exposure ", exposure, " using the ", form_name, " formulas and ", weights_method, " :"), "\n")
# cat(paste0("The median absolute value relation between exposure and confounder is ", round(median(abs(all_bal_stats$avg_bal)), 2), " (range = ",
# round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_stats$avg_ba), 2), ")."), "\n")
# cat("\n")
#
# message(paste0("USER ALERT: For exposure ", exposure, " using the ",
# form_name, ", the following ", nrow(unbalanced_covars), " covariates across time points out of ",
# length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), ",%) spanning ",
# length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), "%) are imbalanced with a remaining average absolute value correlation/std mean difference in relation to ",
# exposure, " of ", round(mean(abs(unbalanced_covars$avg_bal)), 2), " (range=",
# round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "))
# }
# # =======

# =======
# unbalanced_covars <- all_bal_stats %>%
# filter(balanced_avg == 0)
# cat("\n")
# unbalanced_constructs <- sapply(strsplit(unbalanced_covars$covariate, "\\."), "[", 1)[!duplicated(sapply(strsplit(unbalanced_covars$covariate, "\\."), "[", 1))]


if (class(data) == "mids" | class(data) == "list"){
cat(paste0("USER ALERT: Averaging across all imputed datasets for exposure ", exposure, " using the ", form_name, " formulas and ", weights_method, " :"), "\n")

cat(paste0("The median absolute value relation between exposure and confounder is ", round(median(abs(all_bal_stats$avg_bal)), 2), " (range = ",
round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_statss$avg_ba), 2), ")."), "\n")
round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_stats$avg_ba), 2), ")."), "\n")

cat(paste0("As shown below, the following ", nrow(unbalanced_covars), " covariates across time points out of ",
length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), "%) spanning ",
length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2),
"%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to ",
exposure, " of ", round(median(abs(as.numeric(unlist(unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced == 0) %>%
dplyr:: select(avg_bal))))), 2), " (range=",
round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n")

}else {
cat(paste0("USER ALERT: For exposure ", exposure, " using the ",form_name," formulas and ", weights_method, " :"), "\n")

cat(paste0("The median absolute value relation between exposure and confounder is ", round(median(abs(all_bal_stats$avg_bal)), 2), " (range = ",
round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_stats$avg_ba), 2), ")."), "\n")

cat(paste0("As shown below, the following ", nrow(unbalanced_covars), " covariates across time points out of ",
length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), "%) spanning ",
length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2),
Expand All @@ -495,11 +413,6 @@ assessBalance <- function(home_dir, data, exposure, outcome, tv_confounders, ty
}

cat("\n")
# >>>>>>> Stashed changes
# =======
# exposure, " of ", round(median(abs(as.numeric(unlist(unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced_avg == 0) %>%
# dplyr:: select(avg_bal))))), 2), " (range=",
# round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n")

cat("\n")
cat(knitr::kable(unbalanced_covars, caption = "Imbalanced Covariates", format = 'pipe'), sep = "\n")
Expand Down
58 changes: 0 additions & 58 deletions R/calcBalStats.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,6 @@ calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1
}





#creating initial data frames
#data frame with all sampling weights for all exposures at all exposure time points for all histories
all_prop_weights <- data.frame("ID" = NA,exposure = NA, exp_time = NA, history = NA)
Expand Down Expand Up @@ -313,65 +310,10 @@ calcBalStats <- function(data, formulas, exposure, outcome, balance_thresh = 0.1

all_bal_stats <- rbind(all_bal_stats, bal_stats)
all_bal_stats$covar_time[is.na(all_bal_stats$covar_time)] <- 0
# x_lab <- ifelse(exposure_type == "continuous", "Correlation with Exposure", "Standardized Mean Difference Between Exposures")
# labels <- ifelse(bal_stats$balanced == 0, bal_stats$covariate, "")
# min_val <- ifelse(min(bal_stats$std_bal_stats) < 0, min(bal_stats$std_bal_stats) - 0.05, min(balance_thresh) - 0.05)
# max_val <- ifelse(max(bal_stats$std_bal_stats) > 0, max(bal_stats$std_bal_stats) + 0.05, max(balance_thresh) + 0.05)

# Make love plot per exposure time point
make_love_plot(home_dir, folder, exposure, exposure_time_pt, exposure_type, k, form_name, bal_stats, data_type, balance_thresh, weights_method, imp_conf)

#
# lp <- ggplot2::ggplot(aes(x = std_bal_stats, y = covariate), data = bal_stats) +
# ggplot2::geom_point(aes(y = as.factor(covariate), x = std_bal_stats, fill = "white", alpha = 1)) +
# ggplot2::geom_text(aes(label = labels, hjust = -0.2, vjust = 0.2), size = 1.5, color = "red") +
# ggplot2::xlab(x_lab) +
# ggplot2::ylab("Covariate") +
# ggplot2::xlim(min_val, max_val) +
# ggplot2::theme(panel.background = ggplot2::element_rect(fill = "white"),
# axis.text.x = ggplot2::element_text(color = "black"),
# axis.text.y = ggplot2::element_text(color = "black"),
# axis.text = ggplot2::element_text(size = 8),
# panel.border = ggplot2::element_rect(fill = NA, color = "black"),
# plot.background = ggplot2::element_blank(),
# plot.title = ggplot2::element_text(size = 10),
# legend.background = ggplot2::element_blank(),
# legend.key = ggplot2::element_blank(),
# legend.position = "none") +
# ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
#
# if (nrow(bal_stats) > 40) { # stagger covariate labels if there are many
# lp <- lp + ggplot2::scale_y_discrete(guide = ggplot2::guide_axis(n.dodge = 2))
# }
#
# if (data_type == "imputed"){
# lp <- lp + ggplot2::ggtitle(paste0(exposure, " (t=", exposure_time_pt, ") Balance for Imputation ", k))
# } else {lp <- lp + ggplot2::ggtitle(paste0(exposure, " (t=", exposure_time_pt, ") Balance"))
# }
#
# if (!is.null(imp_conf)){ #adding threshold lines
# lp <- lp + ggplot2::geom_vline(xintercept = balance_thresh[1], linetype = "dashed", color = "red")
# lp <- lp + ggplot2::geom_vline(xintercept = -balance_thresh[1], linetype = "dashed", color = "red")
# lp <- lp + ggplot2::geom_vline(xintercept = balance_thresh[2], linetype = "dashed", color = "red")
# lp <- lp + ggplot2::geom_vline(xintercept = -balance_thresh[2], linetype = "dashed", color = "red")
# } else{
# lp <- lp + ggplot2::geom_vline(xintercept = balance_thresh, linetype = "dashed", color = "red")
# lp <- lp + ggplot2::geom_vline(xintercept = -balance_thresh, linetype = "dashed", color = "red")
#
# }
#
# if (data_type == "imputed"){
# lp <- lp + ggplot2::ggtitle(paste0(exposure, " (t = ", exposure_time_pt, ") Balance for Imputation ", k))
#
# suppressMessages(ggplot2::ggsave(lp, filename = paste0(home_dir, "/balance/", folder, "/plots/", form_name, "_imp_", k, "_", exposure, "_",
# exposure_time_pt, "_", weights_method, "_summary_balance_plot.jpeg"), width = 6, height = 8))
# } else {lp <- lp + ggplot2::ggtitle(paste0(exposure, " (t = ", exposure_time_pt, ") Balance"))
#
# suppressMessages(ggplot2::ggsave(lp, filename = paste0(home_dir, "/balance/", folder, "/plots/", form_name, "_", exposure, "_",
# exposure_time_pt, "_", weights_method, "_summary_balance_plot.jpeg"), width = 6, height = 8))
# }


} # Ends exp_time_pt loop


Expand Down
Loading

0 comments on commit f62ce1f

Please sign in to comment.