Skip to content

Commit

Permalink
Merge pull request #100 from istallworthy/feedback
Browse files Browse the repository at this point in the history
noah feedback
  • Loading branch information
istallworthy authored Sep 27, 2023
2 parents 5f12b7f + edbd27b commit 98a10b1
Show file tree
Hide file tree
Showing 14 changed files with 274 additions and 211 deletions.
17 changes: 7 additions & 10 deletions R/assessBalance.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,34 +175,34 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
if (missing(type)){
stop("Please supply a 'weighted', 'prebalance' type", call. = FALSE)
}
if (!inherits(type, "character") | length(type) != 1 ){
if (!inherits(type, "character") || length(type) != 1 ){
stop("Please provide a single type as a character string from the following list: 'prebalance', 'weighted'", call. = FALSE)
}
else if(!type %in% c("prebalance", "weighted")){
stop("Please provide a type from the following list: 'prebalance', 'weighted'", call. = FALSE)
}
else if (type == "prebalance" & !is.null(weights)){
else if (type == "prebalance" && !is.null(weights)){
stop("The 'prebalance' mode of this function assesses balance prior to weighting and thus does not take weights.", call. = FALSE)
}
else if (type == "weighted" & (is.null(weights) | missing(weights))){
else if (type == "weighted" && (is.null(weights) || missing(weights))){
stop("The 'weighted' mode of this function requires weights be supplied in the form of output from createWeights.", call. = FALSE)
}

if (!is.null(weights) & ! inherits(weights, "list")){
if (!is.null(weights) && !inherits(weights, "list")){
stop("Please supply a list of weights output from the createWeights function.", call. = FALSE)
}

if(!is.numeric(balance_thresh)){
stop("Please provide one or two balance thresholds as numbers from 0-1.")
}
if (length(balance_thresh) == 2 & is.null(imp_conf)){
if (length(balance_thresh) == 2 && is.null(imp_conf)){
stop("If you wish to provide different balance threshold for important and less important confounders, please provide a list of important confounders in the 'imp_conf' field.", call. = FALSE)
}

if (!is.null(imp_conf) & length(balance_thresh) == 1){
if (!is.null(imp_conf) && length(balance_thresh) == 1){
stop("If you provide a list of important confounders, please provide a list of two balance thresholds for important and less important confounders, respectively", call. = FALSE)
}
else if(!is.null(imp_conf) & !is.character(imp_conf)){
else if(!is.null(imp_conf) && !is.character(imp_conf)){
stop("Please provide a list variable names as characters that are important confounders.", call. = FALSE)
}

Expand All @@ -220,9 +220,6 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE)
}


# folder <- ifelse(type == "prebalance", "prebalance/", "weighted/")

mi <- !is.data.frame(data)

folder <- switch(type, "prebalance" = "prebalance/", "weighted/")
Expand Down
71 changes: 21 additions & 50 deletions R/calcBalStats.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,13 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_
if(!inherits(formulas, "list")){
stop("Please provide a list of formulas for each exposure time point", call. = FALSE)
}
if (!is.null(weights) & !inherits(weights, "weightitMSM")){
if (!is.null(weights) && !inherits(weights, "weightitMSM")){
stop("Please supply a list of weights output from the createWeights function (via WeightIt::WeightItMSM).", call. = FALSE)
}

form_name <- sapply(strsplit(names(formulas[1]), "_form"), "[", 1)
exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric"), "continuous", "binary")
weighted = ifelse(!is.null(weights), 1, 0)
exposure_type <- if(inherits(data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric")) "continuous" else "binary"
weighted <- if(!is.null(weights)) 1 else 0

factor_covariates <- colnames(data)[which(sapply(data, class) == "factor")]
factor_covariates <- factor_covariates[!factor_covariates %in% "ID"]
Expand All @@ -106,7 +106,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_

data_type <- if (k == 0) "single" else "imputed"

if (data_type == "imputed" & verbose){
if (data_type == "imputed" && verbose){
cat(paste0("**Imputation ", k, "**"), "\n")
}

Expand Down Expand Up @@ -218,21 +218,21 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_
if (exp == 0) { # low levels/absent
if (exposure_type == "continuous") {
data$flag <- ifelse(data[, exps_time[t]] <= median(data[, paste0(exposure, ".", exposure_time_pt)])
& data$flag == flag, t, NA) # finding those w/ vals <= median exp @ time pt & flagged at prev t's
&& data$flag == flag, t, NA) # finding those w/ vals <= median exp @ time pt & flagged at prev t's
}
else { # for binary exp
data$flag <- ifelse(data[, exps_time[t]] == 0 & data$flag == flag, t, NA) # if exposure is absent & flagged at prev t's
data$flag <- ifelse(data[, exps_time[t]] == 0 && data$flag == flag, t , NA) # if exposure is absent & flagged at prev t's
}

}

if (exp == 1) { # hi levels/present
if (exposure_type == "continuous") {
data$flag <- ifelse(data[, exps_time[t]] > median(data[, paste0(exposure, ".", exposure_time_pt)])
& data$flag == flag, t, NA) # finding those w/ vals > median exp @ time pt & flagged at prev t's
&& data$flag == flag, t, NA) # finding those w/ vals > median exp @ time pt & flagged at prev t's
}
else { # binary exp
data$flag <- ifelse(data[, exps_time[t]] == 1 & data$flag == flag, t, NA) # if exposure is present & flagged at prev t's
data$flag <- ifelse(data[, exps_time[t]] == 1 && data$flag == flag, t , NA) # if exposure is present & flagged at prev t's
}

}
Expand Down Expand Up @@ -486,7 +486,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_
}
else{
bal_stats$bal_thresh <- balance_thresh
bal_stats$balanced <- ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0)
bal_stats$balanced <- ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1 , 0)
}

bal_stats$exposure <- exposure
Expand All @@ -505,19 +505,14 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_

if (verbose & save.out) {
if (data_type == "imputed"){
# cat(paste0("For each time point and imputation, ", gsub("/", "", folder), " summary plots for ",
# form_name, " formulas weighting method ",
# weights_method, " have now been saved in the '", folder, "plots/' folder."), "\n")

cat(paste0("For each time point and imputation, %s summary plots for %s
formulas weighting method %s have now been saved in the %s plots/' folder.\n",
gsub("/", "", folder), form_name, weights_method, folder))

}
else {
# cat(paste0(" For each time point, ", gsub("/", "", folder), " summary plots for ",
# form_name, " formulas and weighting method ",
# weights_method, " have now been saved in the '", folder, "plots/' folder."), "\n")

cat(paste0("For each time point, %s summary plots for %s
formulas weighting method %s have now been saved in the %s plots/' folder.\n",
gsub("/", "", folder), form_name, weights_method, folder))
Expand All @@ -536,14 +531,10 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_

if (save.out){
write.csv(bal_summary_exp,
# paste0(home_dir, "/balance/", folder, form_name, "_", exposure, "_", k, "_",
# weights_method, "_balance_stat_summary.csv"))
sprintf("%s/balance/%s%s_%s_%s_%s_balance_stat_summary.csv",
home_dir,folder, form_name, exposure, k, weights_method))

write.csv(all_prop_weights,
# paste0(home_dir, "/balance/", folder, "/", form_name, "_form_", exposure, "_", k,
# "_", weights_method, "_history_sample_weight.csv"))
sprintf("%s/balance/%s/%s_form_%s_%s_%s_history_sample_weight.csv",
home_dir, folder, form_name, exposure, k, weights_method))

Expand All @@ -555,20 +546,15 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_
%s have been saved in the 'balance/%s' folder. \n",
form_name, exposure, k, weights_method, folder))

# cat(paste0("Sampling weights ", "using the ", form_name, " for ", exposure, ", imputation ", k,
# " have been saved in the 'balance/", folder, "' folder"), "\n")
cat(sprintf("Sampling weights using the %s for %s imputation %s have been saved in the 'balance/%s' folder., \n",
form_name, exposure, k, folder))
}
else{
# cat(paste0("Balance statistics using ", form_name, " formulas for ", exposure, "using ",
# weights_method, " have been saved in the 'balance/", folder, "' folder"), "\n")

cat(sprintf("Balance statistics using %s formulas for %s using
%s have been saved in the 'balance/%s' folder. \n",
form_name, exposure, weights_method, folder))

# cat(paste0("Sampling weights ", "using the ", form_name, " for ", exposure,
# " have been saved in the 'balance/", folder, "' folder"), "\n")
cat(sprintf("Sampling weights using the %s for %s have been saved in the 'balance/%s' folder., \n",
form_name, exposure, folder))
}
Expand All @@ -579,9 +565,11 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_
# tallies total possible COVARIATES FROM FORM FOR ASSESSING BALANCE
all_form <- as.data.frame(do.call(rbind, formulas))
tot_covars <- deparse(all_form[, 3], width.cutoff = 300)
tot_covars <- as.character(unlist(strsplit(tot_covars, "\\+")))[!grepl("form", as.character(unlist(strsplit(tot_covars, "\\+"))))]
tot_covars <- as.character(unlist(strsplit(tot_covars, "\\+")))[
!grepl("form", as.character(unlist(strsplit(tot_covars, "\\+"))))]
tot_covars <- gsub(" ", "", tot_covars)
tot_covars <- na.omit(sapply(strsplit(tot_covars, "\\."), "[", 1)[!duplicated(sapply(strsplit(tot_covars, "\\."), "[", 1))])
tot_covars <- na.omit(sapply(strsplit(tot_covars, "\\."), "[", 1)[
!duplicated(sapply(strsplit(tot_covars, "\\."), "[", 1))])

imbalanced_covars <- sum(bal_summary_exp$imbalanced_n, na.rm = TRUE)
total_covars <- sum(bal_summary_exp$n, na.rm = TRUE)
Expand All @@ -601,23 +589,14 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_
if (verbose){
if (data_type == "imputed"){

# cat(paste0("USER ALERT: For exposure ", exposure, " imputation ", k, " using ", weights_method, " and ", form_name, " formulas: "), "\n")
cat(sprintf("USER ALERT: For exposure %s imputation %s using %s and %s formulas: \n",
exposure, k, weights_method, form_name))

# cat(paste0("The median absolute value relation between exposure and confounder is ", round(median(abs(all_bal_stats$std_bal_stats)), 2), " (range = ",
# round(min(all_bal_stats$std_bal_stats), 2), "-", round(max(all_bal_stats$std_bal_stats), 2), ")."), "\n")
cat(sprintf("The median absolute value relation between exposure and confounder is %s (range = %s - %s).\n",
round(median(abs(all_bal_stats$std_bal_stats)), 2),
round(min(all_bal_stats$std_bal_stats), 2),
round(max(all_bal_stats$std_bal_stats), 2)))

# cat(paste0("As shown below, ", imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced,
# "%) covariates across time points, corresponding to ",
# remaining_imbalanced_domains, " out of ", total_domains,
# " domains, remain imbalanced with a remaining median absolute value correlation/std mean difference of ",
# remaining_avg_abs_corr, " (range= ", remaining_corr_range, "):"), "\n")

cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %sout of %s domains,
remain imbalanced with a remaining median absolute value correlation/std mean difference of %s (range= %s):\n",
imbalanced_covars,
Expand All @@ -629,21 +608,14 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_
remaining_corr_range))

cat("\n")
cat(knitr::kable(bal_summary_exp, caption =
# paste0("Imbalanced Covariates for imputation ", k, " using ",
# weights_method, " and ", form_name, " formulas"),
sprintf("Imbalanced Covariates for imputation %s using %s and %s formulas",
k, weights_method, form_name),
cat(knitr::kable(bal_summary_exp,
caption = sprintf("Imbalanced Covariates for imputation %s using %s and %s formulas",
k, weights_method, form_name),
format = 'pipe'), sep = "\n")

cat("\n")
cat("\n")
} else {
# cat(paste0("As shown below, for exposure ", exposure, " using ", weights_method, ", and ", form_name, " formulas, ",
# imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced, "%) covariates across time points corresponding to ",
# remaining_imbalanced_domains, " out of ", total_domains,
# " domains remain imbalanced with a remaining average absolute value correlation/std mean difference of ",
# remaining_avg_abs_corr, " (range= ", remaining_corr_range, ") :"), "\n")

cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %sout of %s domains,
remain imbalanced with a remaining median absolute value correlation/std mean difference of %s (range= %s):\n",
Expand All @@ -656,10 +628,8 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_
remaining_corr_range))

cat("\n")
cat(knitr::kable(bal_summary_exp, caption =
# paste0("Imbalanced covariates using ",
# weights_method, " and ", form_name, " formulas"),
sprintf("Imbalanced covariates using %s and %s formulas", weights_method, form_name),
cat(knitr::kable(bal_summary_exp,
caption = sprintf("Imbalanced covariates using %s and %s formulas", weights_method, form_name),
format = 'pipe'), sep = "\n")
cat("\n")
cat("\n")
Expand All @@ -670,3 +640,4 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_

all_bal_stats
}
f
2 changes: 1 addition & 1 deletion R/compareHelpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ create_custom_comparisons <- function(preds, ref_vals, comp_vals, exposure) {

add_histories <- function(p, d) {

if((is.list(p)) & length(p) == 1){
if((is.list(p)) && length(p) == 1){
history <- matrix(data = NA, nrow = nrow(p[[1]]), ncol = 1) # Get histories from the first element
p <- p[[1]]
}
Expand Down
Loading

0 comments on commit 98a10b1

Please sign in to comment.