Skip to content

Commit

Permalink
Merge pull request #93 from ngreifer/main
Browse files Browse the repository at this point in the history
Fixes, cleaning; see my email to you
  • Loading branch information
istallworthy authored Sep 28, 2023
2 parents 3073a44 + eadb9e7 commit 963ce59
Show file tree
Hide file tree
Showing 10 changed files with 140 additions and 585 deletions.
1 change: 0 additions & 1 deletion .Rapp.history

This file was deleted.

3 changes: 3 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
^docs$
^pkgdown$
^\.github$
^examplePipelineRevisited\.Rmd$
^\.git$
^\.DS_Store$
512 changes: 0 additions & 512 deletions .Rhistory 2

This file was deleted.

82 changes: 49 additions & 33 deletions R/assessBalance.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,13 @@




assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights = NULL, balance_thresh = 0.1,
imp_conf = NULL, verbose = TRUE, save.out = TRUE){

if (!is.logical(save.out)) {
stop("`save.out` must be a flag (TRUE or FALSE)", call. = FALSE)
}
if (save.out) {
if (missing(home_dir)) {
stop("Please supply a home directory.", call. = FALSE)
Expand All @@ -132,10 +136,11 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
}
}

if (missing(data)){
if (missing(data)) {
stop("Please supply data as either a dataframe with no missing data or imputed data in the form of a mids object or path to folder with imputed csv datasets.",
call. = FALSE)
}

else if (!inherits(data, "mids") && !is.data.frame(data) &&
!(is.list(data) && all(vapply(data, is.data.frame, logical(1L))))) {
stop("Please provide either a 'mids' object, a data frame, or a list of imputed csv files in the 'data' field.", call. = FALSE)
Expand Down Expand Up @@ -183,22 +188,28 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
}
}


if (missing(type)){
stop("Please supply a 'weighted', 'prebalance' type", call. = FALSE)
}
if (!inherits(type, "character") || length(type) != 1 ){
stop("Please provide a single type as a character string from the following list: 'prebalance', 'weighted'", call. = FALSE)
}
else if(!type %in% c("prebalance", "weighted")){
else if (!type %in% c("prebalance", "weighted")) {
stop("Please provide a type from the following list: 'prebalance', 'weighted'", call. = FALSE)
}
else if (type == "prebalance" && !is.null(weights)){
else if (type == "prebalance" && !is.null(weights)) {
stop("The 'prebalance' mode of this function assesses balance prior to weighting and thus does not take weights.", call. = FALSE)
}
else if (type == "weighted" && (is.null(weights) || missing(weights))){
else if (type == "weighted" && (is.null(weights) || missing(weights))) {
stop("The 'weighted' mode of this function requires weights be supplied in the form of output from createWeights.", call. = FALSE)
}

if (!inherits(data, "mids") && !is.data.frame(data) &&
!(is.list(data) && all(vapply(data, is.data.frame, logical(1L))))) {
stop("Please provide either a 'mids' object, a data frame, or a list of imputed csv files in the 'data' field.", call. = FALSE)
}

if (!is.null(weights) && (!is.list(weights) || is.data.frame(weights))){
stop("Please supply a list of weights output from the createWeights function.", call. = FALSE)
}
Expand All @@ -209,9 +220,10 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
}
}

if(!is.numeric(balance_thresh)){
if (!is.numeric(balance_thresh)) {
stop("Please provide one or two balance thresholds as numbers from 0-1.")
}

if (length(balance_thresh) == 2 && is.null(imp_conf)){
stop("If you wish to provide different balance threshold for important and less important confounders, please provide a list of important confounders in the 'imp_conf' field.", call. = FALSE)
}
Expand Down Expand Up @@ -241,6 +253,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,

folder <- switch(type, "prebalance" = "prebalance/", "weighted/")


if (save.out){
balance_dir <- file.path(home_dir, "balance")
if (!dir.exists(balance_dir)) {
Expand All @@ -258,11 +271,11 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,

weights_method <- switch(type, "prebalance" = "no weights", weights[[1]]$method)

form_name <- sapply(strsplit(names(formulas[1]), "_form"), "[",1)
form_name <- sapply(strsplit(names(formulas[1]), "_form"), "[", 1)


### Getting balance statistics
if (type == "prebalance"){
if (type == "prebalance") {

if (verbose) {
message(sprintf("USER ALERT: The following statistics display covariate imbalance at each exposure time point prior to weighting,
Expand All @@ -275,7 +288,6 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
if (mi) {

if (inherits(data, "mids")) {

m <- data$m

bal_stats <- lapply(seq_len(m), function(k) {
Expand All @@ -297,6 +309,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
})
}
else {

m = length(data)

bal_stats <- lapply(seq_len(m), function(k) {
Expand All @@ -309,20 +322,20 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,

exposure_type <- if (is.numeric(d[, paste0(exposure, '.', exposure_time_pts[1])])) "continuous" else "binary"

if (sum(duplicated(d$"ID")) > 0){
stop("Please provide wide imputd datasets with a single row per ID.", call. = FALSE)
if (sum(duplicated(d[["ID"]])) > 0){
stop("Please provide wide imputed datasets with a single row per ID.", call. = FALSE)
}

calcBalStats(home_dir, d, formulas, exposure, exposure_time_pts, outcome,
balance_thresh, k = k, weights = NULL, imp_conf, verbose, save.out)
})
}


# Save out balance stats for each imputed dataset
bal_stats_all_imp <- do.call(dplyr::bind_rows, bal_stats)
bal_stats_all_imp <- bal_stats_all_imp[order(bal_stats_all_imp$covariate), ]


if (save.out){
write.csv(bal_stats_all_imp, sprintf("%s/balance/%s/%s_all_imps_balance_stat_summary.csv",
home_dir, type, exposure))
Expand All @@ -343,6 +356,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
avg_bal = rowMeans(do.call(cbind, lapply(bal_stats, `[[`, "std_bal_stats"))))

#adds custom bal thresh info

if (!is.null(imp_conf)){

all_bal_stats$bal_thresh <- ifelse(all_bal_stats$covariate %in% imp_conf,
Expand All @@ -361,14 +375,14 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,

if (verbose){
cat("\n")

cat(paste0("*** Averaging Across All Imputations ***"), "\n")
}

tot_covars <- sapply(strsplit(all_bal_stats$covariate, "\\."), `[`, 1)
}
else {


if (sum(duplicated(data[["ID"]])) > 0){
stop("Please provide wide dataset with a single row per ID.", call. = FALSE)
}
Expand Down Expand Up @@ -399,7 +413,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,


# Weighted
else if (type == "weighted"){
else if (type == "weighted") {

if (verbose) {
message(sprintf("USER ALERT: The following statistics display covariate imbalance at each exposure time point following IPTW weighting,
Expand All @@ -419,7 +433,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
stop("This code requires complete data. Consider imputation if missingness < 20% and is reasonably Missing at Random (MAR).",
call. = FALSE)
}
if (sum(duplicated(d$"ID")) > 0){
if (sum(duplicated(d[["ID"]])) > 0){
stop("Please provide wide imputed datasets with a single row per ID.", call. = FALSE)
}

Expand Down Expand Up @@ -463,7 +477,6 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
}
}


# Gathering imbalanced covariate statistics to average across imputed datasets for the final list/assessment of imbalanced covariates
# Averaging across imputed datasets
all_bal_stats <- data.frame(
Expand All @@ -489,8 +502,8 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
all_bal_stats$bal_thresh, 1, 0)
}

cat("\n")
if (verbose){
if (verbose) {
cat("\n")
cat(paste0("*** Averaging Across All Imputations ***"), "\n")
}

Expand Down Expand Up @@ -521,9 +534,8 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
}
} #ends weighted


### Plotting and summarizing
tot_cons <- tot_covars[!duplicated(tot_covars)] # Total domains/constructs
tot_cons <- unique(tot_covars) # Total domains/constructs

# Make love plot to summarize imbalance at each exposure time point
data_type <- if (mi) "imputed" else "single"
Expand All @@ -544,13 +556,13 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
exposure_type <- if (is.numeric(data[[paste0(exposure, '.', exposure_time_pts[1])]])) "continuous" else "binary"
}


temp <- all_bal_stats[all_bal_stats$exp_time == exposure_time_pt, , drop = FALSE]

make_love_plot(home_dir, folder, exposure, exposure_time_pt, exposure_type, k = 0, form_name, temp,
data_type, balance_thresh, weights_method, imp_conf, verbose, save.out)
})


if (save.out){
outfile <- sprintf("%s/balance/%s/%s-%s_all_%s_%s_associations.html",
home_dir, type, exposure, outcome, type, weights_method)
Expand All @@ -561,6 +573,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
rownames = FALSE, header = FALSE, out = outfile)
sink()


if (verbose){
if (mi){
cat(sprintf("Summary plots for %s %s averaged across all imputations have been saved out for each time point in the 'balance/%s/plots/' folder.\n",
Expand All @@ -573,13 +586,13 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
}
}


if (save.out){
if (save.out) {
# Saving out all pre-balance associations
write.csv(all_bal_stats, sprintf("%s/balance/%s/%s_%s_%s_stat_summary.csv",
home_dir, type, exposure, type, weights_method),
row.names = FALSE)


if (verbose){
if (mi) {
cat(sprintf("Check 'balance/%s/' folder for a table of all %s correlations or
Expand All @@ -596,6 +609,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,


# Finding all imbalanced variables

unbalanced_covars <- all_bal_stats[all_bal_stats$balanced == 0, , drop = FALSE]

unbalanced_constructs <- sapply(strsplit(unbalanced_covars$covariate, "\\."),
Expand Down Expand Up @@ -625,9 +639,9 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
round(min(unbalanced_covars$avg_bal), 2),
round(max(unbalanced_covars$avg_bal), 2)))
}
else{ cat("There are no imbalanced covariates.", "\n")
else{
cat("There are no imbalanced covariates.", "\n")
}

}
else {
# cat(paste0("USER ALERT: For exposure ", exposure, " using the ", form_name," formulas and ", weights_method, " :"), "\n")
Expand Down Expand Up @@ -667,19 +681,21 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
round(min(unbalanced_covars$avg_bal), 2),
round(max(unbalanced_covars$avg_bal), 2)))
}
else{ cat("There are no imbalanced covariates.", "\n")
else{
cat("There are no imbalanced covariates.", "\n")
}
}
}

if (nrow(unbalanced_covars) > 0){
if(verbose){
cat("\n")
cat("\n")
cat(knitr::kable(unbalanced_covars, caption = "Imbalanced Covariates", format = 'pipe'), sep = "\n")
if (nrow(unbalanced_covars) > 0) {
if (verbose)
{
cat("\n\n")
cat(knitr::kable(unbalanced_covars, caption = "Imbalanced Covariates", format = "pipe"), sep = "\n")
cat("\n")
}
if (save.out){

if (save.out) {
# Save out only imbalanced covariates
outfile <- sprintf("%s/balance/%s/%s-%s_%s_%s_all_covariates_imbalanced.html",
home_dir, type, exposure, outcome, type, weights_method)
Expand All @@ -690,8 +706,8 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
sink()
}
}
else{
if(verbose){
else {
if (verbose) {
cat("There are no imbalanced covariates.")
}
}
Expand Down
Loading

0 comments on commit 963ce59

Please sign in to comment.