From fbbb6b6b27e96176b0b3defc55db331554c4cc76 Mon Sep 17 00:00:00 2001 From: Isa Stallworthy <31548151+istallworthy@users.noreply.github.com> Date: Thu, 21 Sep 2023 18:00:37 -0400 Subject: [PATCH 01/12] adding multi-level factor capacity --- R/calcBalStats.R | 59 ++++++++++++++++++++++++++++---------- examplePipelineRevised.Rmd | 4 ++- 2 files changed, 47 insertions(+), 16 deletions(-) diff --git a/R/calcBalStats.R b/R/calcBalStats.R index 6f22663c..e2f92740 100644 --- a/R/calcBalStats.R +++ b/R/calcBalStats.R @@ -126,6 +126,9 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ cat(paste0("**Imputation ", k, "**"), "\n") } + #split factors + data$"ID" = as.numeric(data$"ID") + data2 <- cobalt::splitfactor(data, names(data)[sapply(data, class ) == "factor"], drop.first = FALSE ) #creating initial data frames #data frame with all sampling weights for all exposures at all exposure time points for all histories @@ -147,10 +150,16 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ covars <- as.character(unlist(strsplit(covars, "\\+"))) covars <- gsub(" ", "", covars) + #making factor covars separate variables + data_cov <- data[, covars] + data_cov <- cobalt::splitfactor(data_cov, names(data_cov)[sapply(data_cov, class ) == "factor"], drop.first = FALSE ) + covars <- colnames(data_cov) + # GETTING BALANCE STATS FOR T=1 W/ NO EXPOSURE HISTORY (ok to standardize immediately) if (length(lagged_time_pts) == 0) { - temp <- data + # temp <- data + temp <- data2 # Unweighted pre-balance checking if (!weighted) { @@ -253,8 +262,11 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # GET BALANCE STATISTICS FOR T>1 (when there is a history to weight on) if (length(lagged_time_pts) > 0) { + # Merge data with prop_weights by ID - temp <- merge(data, prop_weights, by = "ID", all.x = TRUE) + # temp <- merge(data, prop_weights, by = "ID", all.x = TRUE) + temp <- merge(data2, prop_weights, by = "ID", all.x = TRUE) #data with factors split up + # Removing any histories that only have 1 or 0 person contributing (cannot calc bal stats) if (sum(prop_sum$n == 1) > 0 | sum(prop_sum$n == 0) > 0) { @@ -281,6 +293,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ temp2 <- temp %>% dplyr::filter(history == i) + #should be same length as covars (already have factors split up) cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance subset = temp2$history[temp2$history == i] == i) # subsetting by that history }) @@ -293,18 +306,20 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after weighting by history - bal_stats <- bal_stats %>% - dplyr::mutate(std_bal_stats = weighted_bal_stats / - (sapply(seq(ncol(data[, covars])), function(x) { #issue: looking in data for unweighted vals but factors have additional vars - sd(as.numeric(data[, covars][, x]), na.rm = TRUE) }) *# unweighted covar sd - sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE))) # exposure SD at that time pt - # bal_stats <- bal_stats %>% # dplyr::mutate(std_bal_stats = weighted_bal_stats / - # (sapply(seq(nrow(bal_stats)), function(x) { + # (sapply(seq(ncol(data[, covars])), function(x) { #issue: looking in data for unweighted vals but factors have additional vars # sd(as.numeric(data[, covars][, x]), na.rm = TRUE) }) *# unweighted covar sd # sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE))) # exposure SD at that time pt + bal_stats <- bal_stats %>% + dplyr::mutate(std_bal_stats = weighted_bal_stats / + (sapply(seq(nrow(bal_stats)), function(x) { #issue: looking in data for unweighted vals but factors have additional vars + sd(as.numeric(data2[, rownames(bal_stats)[x]]), na.rm = TRUE) }) *# unweighted covar sd + sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE))) # exposure SD at that time pt + + + #temp error warning re: factor w/ multiple levels creating different numbers of variables per history --makes bal_stats a list and breaks std code if(inherits(bal_stats, "list")){ stop("There are unequal numbers of variables across histories, likely due to an ordinal variable with several levels denoted as a factor.", @@ -389,9 +404,10 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # standardizing balance statistics after weighting by history bal_stats <- bal_stats %>% dplyr::mutate(std_bal_stats = weighted_bal_stats / - (sapply(seq(ncol(data[, covars])), function(x) { - sd(as.numeric(data[, covars][, x]), na.rm = TRUE) # unweighted covar sd - }) * sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE))) + (sapply(seq(nrow(bal_stats)), function(x) { #issue: looking in data for unweighted vals but factors have additional vars + sd(as.numeric(data2[, rownames(bal_stats)[x]]), na.rm = TRUE) }) *# unweighted covar sd + sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE))) # exposure SD at that time pt + # For a weighted_bal_stat of 0, make std stat also 0 so as not to throw an error bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 @@ -458,9 +474,22 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(bal_stats) bal_stats$covariate <- rownames(bal_stats) - # Renames factor covariates - bal_stats$covariate[sapply(strsplit(bal_stats$covariate, "_"), "[", 1) %in% factor_covariates] <- - sapply(strsplit(bal_stats$covariate, "_"), "[", 1)[sapply(strsplit(bal_stats$covariate, "_"), "[", 1) %in% factor_covariates] + # # Renames factor covariates + # bal_stats$covariate[sapply(strsplit(bal_stats$covariate, "_"), "[", 1) %in% factor_covariates] <- + # sapply(strsplit(bal_stats$covariate, "_"), "[", 1)[sapply(strsplit(bal_stats$covariate, "_"), "[", 1) %in% factor_covariates] + + #averages across factor levels to create one bal stat per factor variable? + data$ID <- as.numeric(data$ID) + f_vars <- colnames(data)[sapply(data, is.factor)] + f_stats <- bal_stats[rownames(bal_stats)[sapply(strsplit(rownames(bal_stats), "_"), "[", 1) %in% f_vars], ] + f_stats$name <- sapply(strsplit(rownames(f_stats), "_"), "[", 1) + test <- f_stats %>% dplyr::group_by(name) %>% dplyr::summarize(new = mean(std_bal_stats)) + colnames(test) <- c("covariate", "std_bal_stats") + new <- data.frame(std_bal_stats = test$std_bal_stats, + covariate = test$covariate) + rownames(new) <- new$covariate + bal_stats <- rbind(bal_stats[rownames(bal_stats)[!sapply(strsplit(rownames(bal_stats), "_"), "[", 1) %in% f_vars], ], + new) #adds custom bal thresh info if (!is.null(imp_conf)){ diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd index 7ae95feb..2db17b36 100644 --- a/examplePipelineRevised.Rmd +++ b/examplePipelineRevised.Rmd @@ -189,11 +189,13 @@ imputed_data <- readRDS(paste0(home_dir, "/imputed_data.rds")) #place your .rds data <- imputed_data + #optional: extract first imputed dataset for testing library(mice) data <- mice::complete(imputed_data, 1) #just for testing purposes +data$"PmMrSt2" = as.factor(data$"PmMrSt2") # library(dplyr) -# data <- data %>% +# data <- data %>% # dplyr::select(-c(contains(c(":", "Childhood", "Infancy", "Toddlerhood", "pcx")))) #removing these --came from old dataset ``` From 342dea128e752bc2aeecd33720f5697ce0b368a2 Mon Sep 17 00:00:00 2001 From: Isa Stallworthy <31548151+istallworthy@users.noreply.github.com> Date: Thu, 21 Sep 2023 18:16:22 -0400 Subject: [PATCH 02/12] factor fix --- R/calcBalStats.R | 21 +++++++++++++++------ examplePipelineRevised.Rmd | 5 ++++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/R/calcBalStats.R b/R/calcBalStats.R index e2f92740..bc870a64 100644 --- a/R/calcBalStats.R +++ b/R/calcBalStats.R @@ -127,8 +127,13 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ } #split factors - data$"ID" = as.numeric(data$"ID") - data2 <- cobalt::splitfactor(data, names(data)[sapply(data, class ) == "factor"], drop.first = FALSE ) + if(length(names(data)[sapply(data, class ) == "factor"]) > 0){ + data$"ID" = as.numeric(data$"ID") + data2 <- cobalt::splitfactor(data, names(data)[sapply(data, class ) == "factor"], drop.first = FALSE ) + } + else{ + data2 <- data + } #creating initial data frames #data frame with all sampling weights for all exposures at all exposure time points for all histories @@ -150,10 +155,14 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ covars <- as.character(unlist(strsplit(covars, "\\+"))) covars <- gsub(" ", "", covars) - #making factor covars separate variables - data_cov <- data[, covars] - data_cov <- cobalt::splitfactor(data_cov, names(data_cov)[sapply(data_cov, class ) == "factor"], drop.first = FALSE ) - covars <- colnames(data_cov) + + if(length(names(data)[sapply(data, class ) == "factor"]) > 0){ + + #making factor covars separate variables + data_cov <- data[, covars] + data_cov <- cobalt::splitfactor(data_cov, names(data_cov)[sapply(data_cov, class ) == "factor"], drop.first = FALSE ) + covars <- colnames(data_cov) + } # GETTING BALANCE STATS FOR T=1 W/ NO EXPOSURE HISTORY (ok to standardize immediately) diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd index 2db17b36..99ad5deb 100644 --- a/examplePipelineRevised.Rmd +++ b/examplePipelineRevised.Rmd @@ -56,7 +56,7 @@ library(devMSMsHelpers) #prior to re-installing using the code above. Sorry, this is annoying! #conducting package checks & tests from documentation --just for IS to run; comment out when testing the workflow -# devtools::check() +devtools::check() ``` @@ -193,7 +193,10 @@ data <- imputed_data #optional: extract first imputed dataset for testing library(mice) data <- mice::complete(imputed_data, 1) #just for testing purposes + +#testing multi-level factors data$"PmMrSt2" = as.factor(data$"PmMrSt2") +data$HomeOwnd = as.factor(data$HomeOwnd) # library(dplyr) # data <- data %>% # dplyr::select(-c(contains(c(":", "Childhood", "Infancy", "Toddlerhood", "pcx")))) #removing these --came from old dataset From 53cf2305c5eb3b4750c1910a25a8da5dcfb2de55 Mon Sep 17 00:00:00 2001 From: Isa Stallworthy <31548151+istallworthy@users.noreply.github.com> Date: Fri, 22 Sep 2023 11:35:49 -0400 Subject: [PATCH 03/12] tv_confounders optional --- R/assessBalance.R | 14 ++----------- R/createFormulas.R | 24 ++++++++++++---------- examplePipelineRevised.Rmd | 41 ++++++++++++++++++++------------------ man/assessBalance.Rd | 10 ---------- man/createFormulas.Rd | 15 +++++++------- 5 files changed, 44 insertions(+), 60 deletions(-) diff --git a/R/assessBalance.R b/R/assessBalance.R index 2b6355d4..9df8c148 100644 --- a/R/assessBalance.R +++ b/R/assessBalance.R @@ -34,8 +34,6 @@ #' @param exposure_time_pts list of integers at which weights will be #' created/assessed that correspond to time points when exposure wass measured #' @param outcome name of outcome variable with ".timepoint" suffix -#' @param tv_confounders list of time-varying confounders with ".timepoint" -#' suffix #' @param formulas list of balancing formulas at each time point output from #' createFormulas() #' @param weights list of IPTW weights output from createWeights, required for @@ -78,7 +76,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "prebalance", #' formulas = f, #' save.out = FALSE) @@ -86,7 +83,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "prebalance", #' formulas = f, #' balance_thresh = 0.2, @@ -95,7 +91,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "prebalance", #' formulas = f, #' balance_thresh = c(0.1, 0.2), @@ -113,7 +108,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "weighted", #' weights = w, #' formulas = f, @@ -122,7 +116,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "weighted", #' weights = w, #' formulas = f, @@ -132,7 +125,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "weighted", #' weights = w, #' formulas = f, @@ -142,7 +134,7 @@ -assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights = NULL, balance_thresh = 0.1, +assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights = NULL, balance_thresh = 0.1, imp_conf = NULL, verbose = TRUE, save.out = TRUE){ if (save.out) { @@ -167,9 +159,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, if (missing(exposure_time_pts)){ stop("Please supply the exposure time points at which you wish to create weights.", call. = FALSE) } - if (missing(tv_confounders)){ - stop("Please supply a list of time-varying confounders.", call. = FALSE) - } + if (missing(type)){ stop("Please supply a 'weighted', 'prebalance' type", call. = FALSE) } diff --git a/R/createFormulas.R b/R/createFormulas.R index 3c79aa77..215b811b 100644 --- a/R/createFormulas.R +++ b/R/createFormulas.R @@ -10,9 +10,9 @@ #' @param exposure_time_pts list of integers at which weights will be #' created/assessed that correspond to time points when exposure wass measured #' @param outcome name of outcome variable with ".timepoint" suffix -#' @param tv_confounders list of time-varying confounders with ".timepoint" +#' @param tv_confounders (optional) list of time-varying confounders with ".timepoint" #' suffix -#' @param ti_confounders list of time invariant confounders +#' @param ti_confounders (optional) list of time invariant confounders #' @param type type of formula to create from 'full' (includes all lagged #' time-varying confounders), 'short' (includes time-varying confounders at #' t-1 lag only), or 'update' (adds to 'short' formulas any imbalanced @@ -73,7 +73,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "weighted", #' weights = w, #' formulas = f, @@ -89,7 +88,7 @@ #' save.out = FALSE) -createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, bal_stats = NULL, concur_conf = NULL, +createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = NULL, bal_stats = NULL, concur_conf = NULL, keep_conf = NULL, custom = NULL, verbose = TRUE, save.out = TRUE ){ if (save.out) { @@ -111,10 +110,12 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_co stop("Please supply the exposure time points at which you wish to create weights.", call. = FALSE) } if (missing(tv_confounders)){ - stop("Please supply a list of time-varying confounders.", call. = FALSE) + warning("You have not specified any time-varying confounders. If you have time-varying exposure, please list all wide exposure variables as tv_confounders.", call. = FALSE) + tv_confounders <- character(0) } if (missing(ti_confounders)){ - stop("Please supply a list of time invariant confounders.", call. = FALSE) + stop("You have not specified time invariant confounders.", call. = FALSE) + # ti_confounders <- NULL } if (missing(type)){ stop("Please supply a 'full', 'short', or 'update' type", call. = FALSE) @@ -151,12 +152,10 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_co } if(save.out){ - #create parent directory forms_dir1 <- file.path(home_dir, "formulas") if (!dir.exists(forms_dir1)) { dir.create(forms_dir1) } - # Create type directory forms_dir <- file.path(home_dir, "formulas", type) if (!dir.exists(forms_dir)) { dir.create(forms_dir) @@ -178,14 +177,17 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_co message("USER ALERT: Please manually inspect the full balancing formula below:") } time_var_include <- time_varying_covariates[as.numeric(sapply(strsplit(time_varying_covariates, "\\."), "[", 2)) < time] + } else if (type == "short"){ if(verbose){ message("USER ALERT: Please manually inspect the short balancing formula below that includes time-varying confounders at t-1 only:") } + time_var_include <- time_varying_covariates[as.numeric(sapply(strsplit(time_varying_covariates, "\\."), "[", 2)) == exposure_time_pts[x-1]] + } else if (type == "update"){ @@ -208,10 +210,10 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, tv_co dplyr::select(covariate) - # Renames factors (that were appended w/ level) + # # Renames factors (that were appended w/ level) if (nrow(new) > 0) { - new$covariate[sapply(strsplit(new$covariate, "_"), `[`, 1) %in% factor_covariates] <- - sapply(strsplit(new$covariate, "_"), `[`, 1)[sapply(strsplit(new$covariate, "_"), `[`, 1) %in% factor_covariates] + # new$covariate[sapply(strsplit(new$covariate, "_"), `[`, 1) %in% factor_covariates] <- + # sapply(strsplit(new$covariate, "_"), `[`, 1)[sapply(strsplit(new$covariate, "_"), `[`, 1) %in% factor_covariates] new <- as.character(unlist(new)) diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd index 99ad5deb..95055043 100644 --- a/examplePipelineRevised.Rmd +++ b/examplePipelineRevised.Rmd @@ -56,7 +56,7 @@ library(devMSMsHelpers) #prior to re-installing using the code above. Sorry, this is annoying! #conducting package checks & tests from documentation --just for IS to run; comment out when testing the workflow -devtools::check() +# devtools::check() ``` @@ -120,7 +120,7 @@ Choose from the following preliminary steps to assign to 'data' one of the follo - a mids object (output from mice::mice()) of data imputed in wide format - a list of data imputed in wide format -Data columns shoudl be either numeric or factor form. Note that at present, the code can only take factors with 2 levels. +Data columns should be either numeric or factor form. ### P1. Format Data #### P1a. Format single data frame of long data @@ -131,7 +131,7 @@ data_long_f <- formatLongData(home_dir, data_long, exposure, exposure_time_pts, time_var = "TAge", #list original time variable here if it's not "WAVE" id_var = "S_ID", #list original id variable here if it's not "ID" missing = NA, #list missing value here - factor_confounders = c("state","TcBlac2", "PmBlac2", "RHasSO"), + factor_confounders = c("state","TcBlac2", "PmBlac2", "RHasSO", "PmMrSt2", "HomeOwnd"), save.out = TRUE) #list factor variables here @@ -194,7 +194,7 @@ data <- imputed_data library(mice) data <- mice::complete(imputed_data, 1) #just for testing purposes -#testing multi-level factors +#IS testing multi-level factors data$"PmMrSt2" = as.factor(data$"PmMrSt2") data$HomeOwnd = as.factor(data$HomeOwnd) # library(dplyr) @@ -309,6 +309,9 @@ full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, #or minimum inputs full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type) +full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders) + + ``` #### 1b. Exploratory prebalance assessment @@ -327,11 +330,11 @@ type <- "prebalance" formulas <- full_formulas #all inputs -prebalance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, +prebalance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, balance_thresh = balance_thresh, imp_conf = imp_conf, verbose = TRUE, save.out = TRUE) #or minimum inputs -prebalance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas) +prebalance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas) ``` @@ -432,31 +435,31 @@ weights <- weights.ps #all inputs -balance_stats.ps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats.ps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) #minimum inputs -balance_stats.ps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights) +balance_stats.ps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights) weights <- weights.cbps -balance_stats.cbps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats.cbps <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) weights <- weights.glm -balance_stats.glm <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats.glm <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) weights <- weights.gbm -balance_stats.gbm <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats.gbm <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) weights <- weights.bart -balance_stats.bart <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats.bart <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) weights <- weights.super -balance_stats.super <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats.super <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) ``` @@ -482,11 +485,11 @@ formulas <- full_formulas weights <- weights.ps #all inputs -balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) #minimum inputs -balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights) +balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights) ``` @@ -591,11 +594,11 @@ weights <- trim_weights #all input -final_balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +final_balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) #minimum input -final_balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights) +final_balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights) ``` #### Sensitvity analyses @@ -603,11 +606,11 @@ final_balance_stats <- assessBalance(home_dir, data, exposure, exposure_time_pts #sensitivity tests weights <- trim_weights.s1 -final_balance_stats.s1 <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +final_balance_stats.s1 <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) weights <- trim_weights.s2 -final_balance_stats.s2 <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, tv_confounders, type, formulas, weights, +final_balance_stats.s2 <- assessBalance(home_dir, data, exposure, exposure_time_pts, outcome, type, formulas, weights, balance_thresh, imp_conf, verbose = TRUE, save.out = TRUE) ``` diff --git a/man/assessBalance.Rd b/man/assessBalance.Rd index 270012d0..d7f22733 100644 --- a/man/assessBalance.Rd +++ b/man/assessBalance.Rd @@ -10,7 +10,6 @@ assessBalance( exposure, exposure_time_pts, outcome, - tv_confounders, type, formulas, weights = NULL, @@ -34,9 +33,6 @@ created/assessed that correspond to time points when exposure wass measured} \item{outcome}{name of outcome variable with ".timepoint" suffix} -\item{tv_confounders}{list of time-varying confounders with ".timepoint" -suffix} - \item{type}{type of balance assessment; 'prebalance' or 'weighted'} \item{formulas}{list of balancing formulas at each time point output from @@ -93,7 +89,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "prebalance", formulas = f, save.out = FALSE) @@ -101,7 +96,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "prebalance", formulas = f, balance_thresh = 0.2, @@ -110,7 +104,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "prebalance", formulas = f, balance_thresh = c(0.1, 0.2), @@ -128,7 +121,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "weighted", weights = w, formulas = f, @@ -137,7 +129,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "weighted", weights = w, formulas = f, @@ -147,7 +138,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "weighted", weights = w, formulas = f, diff --git a/man/createFormulas.Rd b/man/createFormulas.Rd index 113670cf..b9ffdf16 100644 --- a/man/createFormulas.Rd +++ b/man/createFormulas.Rd @@ -9,9 +9,9 @@ createFormulas( exposure, exposure_time_pts, outcome, - tv_confounders, - ti_confounders, type, + ti_confounders, + tv_confounders = NULL, bal_stats = NULL, concur_conf = NULL, keep_conf = NULL, @@ -30,16 +30,16 @@ created/assessed that correspond to time points when exposure wass measured} \item{outcome}{name of outcome variable with ".timepoint" suffix} -\item{tv_confounders}{list of time-varying confounders with ".timepoint" -suffix} - -\item{ti_confounders}{list of time invariant confounders} - \item{type}{type of formula to create from 'full' (includes all lagged time-varying confounders), 'short' (includes time-varying confounders at t-1 lag only), or 'update' (adds to 'short' formulas any imbalanced time-varying confounders at lags great than t-1)} +\item{ti_confounders}{(optional) list of time invariant confounders} + +\item{tv_confounders}{(optional) list of time-varying confounders with ".timepoint" +suffix} + \item{bal_stats}{list of balance statistics from assessBalance(), required for 'update' type} @@ -107,7 +107,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "weighted", weights = w, formulas = f, From addeefc347e208cd94c42247cf3ec27d6e666d0f Mon Sep 17 00:00:00 2001 From: Isa Stallworthy <31548151+istallworthy@users.noreply.github.com> Date: Fri, 22 Sep 2023 12:08:33 -0400 Subject: [PATCH 04/12] prune inputs --- R/compareHistories.R | 13 +++---------- R/eval_hist.R | 6 ++---- examplePipelineRevised.Rmd | 12 ++++++------ man/compareHistories.Rd | 7 ------- man/eval_hist.Rd | 7 +++---- 5 files changed, 14 insertions(+), 31 deletions(-) diff --git a/R/compareHistories.R b/R/compareHistories.R index 92443558..8dc27e86 100644 --- a/R/compareHistories.R +++ b/R/compareHistories.R @@ -25,8 +25,6 @@ #' @param exposure_time_pts list of integers at which weights will be #' created/assessed that correspond to time points when exposure wass measured #' @param outcome name of outcome variable with ".timepoint" suffix -#' @param tv_confounders list of time-varying confounders with ".timepoint" -#' suffix #' @param model list of model outputs from fitModel() #' @param epochs (optional) data frame of exposure epoch labels and values #' @param hi_lo_cut (optional) list of two numbers indicating quantile values @@ -92,13 +90,11 @@ #' r <- compareHistories(exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' model = m, #' save.out = FALSE) #' r <- compareHistories(exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' model = m, #' reference = "l-l-l", #' comparison = "h-h-h", @@ -106,14 +102,13 @@ #' r <- compareHistories(exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' model = m, #' reference = "l-l-l", #' comparison = c("h-h-h", "h-l-l"), #' save.out = FALSE) -compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, epochs = NULL, hi_lo_cut = NULL, +compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, model, epochs = NULL, hi_lo_cut = NULL, reference = NA, comparison = NULL, mc_comp_method = "BH", dose_level = "h", exp_lab = NA, out_lab = NA, colors = "Dark2", verbose = TRUE, save.out = TRUE ) { @@ -135,9 +130,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ if (missing(exposure_time_pts)){ stop("Please supply the exposure time points at which you wish to create weights.", call. = FALSE) } - if (missing(tv_confounders)){ - stop("Please supply a list of time-varying confounders.", call. = FALSE) - } + if (missing(model)){ stop("Please supply a list of model output", call. = FALSE) } @@ -168,7 +161,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, tv_ #history inspection --done early bc function deals with epochs, etc. #print history sample distribution if (verbose){ - eval_hist(data = model[[1]]$data, exposure, tv_confounders, epochs, + eval_hist(data = model[[1]]$data, exposure, epochs, exposure_time_pts, hi_lo_cut, reference, comparison, verbose) } diff --git a/R/eval_hist.R b/R/eval_hist.R index 95f4f42b..74f35fea 100644 --- a/R/eval_hist.R +++ b/R/eval_hist.R @@ -58,13 +58,11 @@ #' ref = "l-l-l", #' comps = "h-h-h") -eval_hist <- function(data, exposure, tv_confounders, epochs = NULL, time_pts, hi_lo_cut = NULL, ref = NA, comps = NULL, verbose = TRUE){ +eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, ref = NA, comps = NULL, verbose = TRUE){ exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', time_pts[1])], "numeric"), "continuous", "binary") - time_varying_wide <- tv_confounders - time_varying_wide <- sort(time_varying_wide) - time_varying_wide <- c("ID", time_varying_wide) + data_wide <- data diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd index 95055043..f7c2b680 100644 --- a/examplePipelineRevised.Rmd +++ b/examplePipelineRevised.Rmd @@ -127,7 +127,7 @@ Data columns should be either numeric or factor form. ```{r} # if you have long data with wrong time, id, and/or missing indicators and need to format -data_long_f <- formatLongData(home_dir, data_long, exposure, exposure_time_pts, outcome, tv_confounders, +data_long_f <- formatLongData(home_dir, data_long, exposure, exposure_time_pts, outcome, time_var = "TAge", #list original time variable here if it's not "WAVE" id_var = "S_ID", #list original id variable here if it's not "ID" missing = NA, #list missing value here @@ -263,7 +263,7 @@ comparison <- c("h-h-h", "h-h-l") #multiple ```{r} #all inputs -inspectData(data, home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, #required input +inspectData(data, home_dir, exposure, exposure_time_pts, outcome, ti_confounders, #required input epochs, hi_lo_cut, reference, comparison, verbose = TRUE, save.out = TRUE) #optional input @@ -715,14 +715,14 @@ model <- models #output from fitModel #all inputs -results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, +results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, model, epochs, hi_lo_cut, reference, comparison, mc_comp_method , dose_level, exp_lab, out_lab, colors, verbose = FALSE, save.out = TRUE) #minimum inputs reference <- "l-l-l-l-l" comparison <- c("h-h-h-h-h", "l-h-h-h-h") #multiple -results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, +results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, model, reference = reference, comparison = comparison) # for our data, specify ref and comp bc too many comparisons for exposure time point; ``` @@ -731,11 +731,11 @@ results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_c #sensitivity tests model <- models.s1 -results.s1 <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, +results.s1 <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, model, epochs, hi_lo_cut, reference, comparison, mc_comp_method , dose_level, exp_lab, out_lab, colors, verbose = TRUE, save.out = TRUE) model <- models.s2 -results.s2 <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, model, +results.s2 <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, model, epochs, hi_lo_cut, reference, comparison, mc_comp_method , dose_level, exp_lab, out_lab, colors, verbose = TRUE, save.out = TRUE) ``` diff --git a/man/compareHistories.Rd b/man/compareHistories.Rd index 4b3c7325..bca151c9 100644 --- a/man/compareHistories.Rd +++ b/man/compareHistories.Rd @@ -9,7 +9,6 @@ compareHistories( exposure, exposure_time_pts, outcome, - tv_confounders, model, epochs = NULL, hi_lo_cut = NULL, @@ -34,9 +33,6 @@ created/assessed that correspond to time points when exposure wass measured} \item{outcome}{name of outcome variable with ".timepoint" suffix} -\item{tv_confounders}{list of time-varying confounders with ".timepoint" -suffix} - \item{model}{list of model outputs from fitModel()} \item{epochs}{(optional) data frame of exposure epoch labels and values} @@ -121,13 +117,11 @@ m <- fitModel(data = test, r <- compareHistories(exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), model = m, save.out = FALSE) r <- compareHistories(exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), model = m, reference = "l-l-l", comparison = "h-h-h", @@ -135,7 +129,6 @@ r <- compareHistories(exposure = "A", r <- compareHistories(exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), model = m, reference = "l-l-l", comparison = c("h-h-h", "h-l-l"), diff --git a/man/eval_hist.Rd b/man/eval_hist.Rd index b96aa825..c892b7dc 100644 --- a/man/eval_hist.Rd +++ b/man/eval_hist.Rd @@ -7,7 +7,6 @@ eval_hist( data, exposure, - tv_confounders, epochs = NULL, time_pts, hi_lo_cut = NULL, @@ -22,9 +21,6 @@ data frames, or mids object} \item{exposure}{name of exposure variable} -\item{tv_confounders}{list of time-varying confounders with ".timepoint" -suffix} - \item{epochs}{(optional) data frame of exposure epoch labels and values} \item{time_pts}{list of integers at which weights will be @@ -43,6 +39,9 @@ reference, required if reference is supplied} \item{verbose}{(optional) TRUE or FALSE indicator for user output (default is TRUE)} + +\item{tv_confounders}{list of time-varying confounders with ".timepoint" +suffix} } \value{ none From 33126d2c2546e0eb23eeeffc95e410edfb7fc320 Mon Sep 17 00:00:00 2001 From: Isa Stallworthy <31548151+istallworthy@users.noreply.github.com> Date: Mon, 25 Sep 2023 12:01:53 -0400 Subject: [PATCH 05/12] pruning dependencies --- R/assessBalance.R | 90 +++++++++++--------- R/calcBalStats.R | 169 ++++++++++++++++++++++--------------- R/compareHelpers.R | 2 +- R/compareHistories.R | 27 ++---- R/createFormulas.R | 27 +++--- R/eval_hist.R | 32 ++++--- R/fitModel.R | 11 +-- R/getModel.R | 17 ++-- R/make_love_plot.R | 4 +- examplePipelineRevised.Rmd | 22 ++--- 10 files changed, 227 insertions(+), 174 deletions(-) diff --git a/R/assessBalance.R b/R/assessBalance.R index 9df8c148..e7f27895 100644 --- a/R/assessBalance.R +++ b/R/assessBalance.R @@ -5,23 +5,6 @@ #' guidelines from Jackson, 2016 on how to assess balance for time-varying #' exposures. #' -#' @importFrom knitr kable -#' @importFrom ggplot2 ggplot -#' @importFrom ggplot2 theme -#' @importFrom ggplot2 geom_text -#' @importFrom ggplot2 geom_vline -#' @importFrom ggplot2 geom_point -#' @importFrom ggplot2 xlab -#' @importFrom ggplot2 xlim -#' @importFrom ggplot2 ggtitle -#' @importFrom ggplot2 scale_y_discrete -#' @importFrom ggplot2 element_text -#' @importFrom ggplot2 element_rect -#' @importFrom ggplot2 element_blank -#' @importFrom ggplot2 guide_axis -#' @importFrom ggplot2 ggsave -#' @importFrom dplyr bind_rows -#' @importFrom dplyr %>% #' @seealso {[cobalt] package, #' } #' @seealso {Jackson, 2016 for more on assessing balance for time-varying @@ -315,15 +298,24 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, #adds custom bal thresh info if (!is.null(imp_conf)){ - all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = ifelse(all_bal_stats$covariate %in% imp_conf, - balance_thresh[1], balance_thresh[2])) - all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - all_bal_stats$bal_thresh, 1, 0) ) + # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = ifelse(all_bal_stats$covariate %in% imp_conf, + # balance_thresh[1], balance_thresh[2])) + all_bal_stats$bal_thresh <- ifelse(all_bal_stats$covariate %in% imp_conf, + balance_thresh[1], balance_thresh[2]) + + # all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < + # all_bal_stats$bal_thresh, 1, 0) ) + all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < + all_bal_stats$bal_thresh, 1, 0) } else{ - all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = balance_thresh) - all_bal_stats <- all_bal_stats %>% dplyr::mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - all_bal_stats$bal_thresh, 1, 0) ) + # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = balance_thresh) + all_bal_stats$bal_thresh <- balance_thresh + # + # all_bal_stats <- all_bal_stats %>% dplyr::mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < + # all_bal_stats$bal_thresh, 1, 0) ) + all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < + all_bal_stats$bal_thresh, 1, 0) } cat("\n") @@ -447,15 +439,24 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, # #adds custom bal thresh info if (!is.null(imp_conf)){ - all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = ifelse(all_bal_stats$covariate %in% imp_conf, - balance_thresh[1], balance_thresh[2])) - all_bal_stats <- all_bal_stats %>%dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - all_bal_stats$bal_thresh, 1, 0) ) + # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = ifelse(all_bal_stats$covariate %in% imp_conf, + # balance_thresh[1], balance_thresh[2])) + all_bal_stas$bal_thresh <-ifelse(all_bal_stats$covariate %in% imp_conf, + balance_thresh[1], balance_thresh[2]) + + # all_bal_stats <- all_bal_stats %>%dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < + # all_bal_stats$bal_thresh, 1, 0) ) + all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < + all_bal_stats$bal_thresh, 1, 0) } else{ - all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = balance_thresh) - all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - all_bal_stats$bal_thresh, 1, 0) ) + # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = balance_thresh) + all_bal_stats$bal_thresh <- balance_thresh + + # all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < + # all_bal_stats$bal_thresh, 1, 0) ) + all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < + all_bal_stats$bal_thresh, 1, 0) } cat("\n") @@ -515,7 +516,8 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric"), "continuous", "binary") } - temp <- all_bal_stats %>% filter(exp_time == exposure_time_pt) + # temp <- all_bal_stats %>% filter(exp_time == exposure_time_pt) + temp <- all_bal_stats[all_bal_stats$exp_time == exposure_time_pt, ] make_love_plot(home_dir, folder, exposure, exposure_time_pt, exposure_type, k = 0, form_name, temp, data_type, balance_thresh, weights_method, imp_conf, verbose, save.out) @@ -538,7 +540,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, } else{ cat(paste0("Summary plots for ", form_name, " ", exposure, - " have now been saved out for each time point in the 'balance/", type, "/plots/' folder."), "\n") + " have now been saved out for each time point in the 'balance/", type, "/plots/' folder."), "\n") } } } @@ -563,8 +565,10 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, # Finding all imbalanced variables - unbalanced_covars <- all_bal_stats %>% - filter(balanced == 0) + # unbalanced_covars <- all_bal_stats %>% + # filter(balanced == 0) + unbalanced_covars <- all_bal_stats[all_bal_stats$balanced == 0, ] + unbalanced_constructs <- sapply(strsplit(unbalanced_covars$covariate, "\\."), "[", 1)[!duplicated(sapply(strsplit(unbalanced_covars$covariate, "\\."), "[", 1))] @@ -583,8 +587,12 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), "%) spanning ", length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), "%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to ", - exposure, " of ", round(median(abs(as.numeric(unlist(unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced == 0) %>% - dplyr:: select(avg_bal))))), 2), " (range=", + exposure, " of ", round(median(abs(as.numeric(unlist( + # unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced == 0) %>% + # dplyr:: select(avg_bal) + + unbalanced_covars[unbalanced_covars$balanced == 0, ][, "avg_bal"] + )))), 2), " (range=", round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n") } else{ cat("There are no imbalanced covariates.", "\n") @@ -603,8 +611,12 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), "%) spanning ", length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), "%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to ", - exposure, " of ", round(median(abs(as.numeric(unlist(unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced == 0) %>% - dplyr:: select(avg_bal))))), 2), " (range=", + exposure, " of ", round(median(abs(as.numeric(unlist( + # unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced == 0) %>% + # dplyr:: select(avg_bal) + unbalanced_covars[unbalanced_covars$balanced == 0, ][, "avg_bal"] + + )))), 2), " (range=", round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n") } else{ cat("There are no imbalanced covariates.", "\n") diff --git a/R/calcBalStats.R b/R/calcBalStats.R index bc870a64..27d34dae 100644 --- a/R/calcBalStats.R +++ b/R/calcBalStats.R @@ -111,7 +111,8 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (weighted == 1){ weights_method = weights$method w <- weights$weights #IPTW weights - data <- data %>% dplyr::mutate(weights = as.numeric(w)) + # data <- data %>% dplyr::mutate(weights = as.numeric(w)) + data$weights <- as.numeric(w) } else{ weights_method <- "no weights" @@ -265,9 +266,12 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ } # ends history loop (e.g., "l-l-l") #summarize contributors to each history - prop_sum <- prop_weights %>% - dplyr::group_by(as.factor(history)) %>% - dplyr::summarize(n = dplyr::n()) + # prop_sum <- prop_weights %>% + # dplyr::group_by(as.factor(history)) %>% + # dplyr::summarize(n = dplyr::n()) + + prop_sum <- aggregate(exposure ~ as.factor(history), data = prop_weights, + FUN = function(x) n = length(x)) # GET BALANCE STATISTICS FOR T>1 (when there is a history to weight on) if (length(lagged_time_pts) > 0) { @@ -299,8 +303,9 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (weighted == 0) { # no IPTW weighting but weighting on history if (exposure_type == "continuous") { bal_stats <- sapply(sort(unique(temp$history)), function(i) { # finding balance by history - temp2 <- temp %>% - dplyr::filter(history == i) + # temp2 <- temp %>% + # dplyr::filter(history == i) + temp2 <- temp[temp$history == i, ] #should be same length as covars (already have factors split up) cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance @@ -321,11 +326,12 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # sd(as.numeric(data[, covars][, x]), na.rm = TRUE) }) *# unweighted covar sd # sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE))) # exposure SD at that time pt - bal_stats <- bal_stats %>% - dplyr::mutate(std_bal_stats = weighted_bal_stats / - (sapply(seq(nrow(bal_stats)), function(x) { #issue: looking in data for unweighted vals but factors have additional vars - sd(as.numeric(data2[, rownames(bal_stats)[x]]), na.rm = TRUE) }) *# unweighted covar sd - sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE))) # exposure SD at that time pt + + # bal_stats <- bal_stats %>% + bal_stats$std_bal_stats <- weighted_bal_stats / + (sapply(seq(nrow(bal_stats)), function(x) { #issue: looking in data for unweighted vals but factors have additional vars + sd(as.numeric(data2[, rownames(bal_stats)[x]]), na.rm = TRUE) }) *# unweighted covar sd + sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE)) # exposure SD at that time pt @@ -335,14 +341,18 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ call. = FALSE) } - bal_stats <- bal_stats %>% - dplyr::select(contains(c("std"))) + # bal_stats <- bal_stats %>% + # dplyr::select(contains(c("std"))) + bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) + + } #ends continuous else if (exposure_type == "binary") { bal_stats <- sapply(sort(unique(temp$history)), function(i) { - temp2 <- temp %>% - dplyr::filter(history == i) + # temp2 <- temp %>% + # dplyr::filter(history == i) + temp2 <- temp[temp$history == i, ] cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference subset = temp2$history[temp2$history == i] == i) # subsetting by that history @@ -356,16 +366,16 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after finding weighted balance stats - bal_stats <- bal_stats %>% - dplyr::mutate(std_bal_stats = weighted_bal_stats/ - sapply(seq(ncol(data[, covars])), function(x){ - sqrt(mean( #dividing by pool SD estimate (unadjusted) - var(as.numeric(data[data[, (colnames(data) %in% paste0(exposure, ".", - exposure_time_pts[1]))] == 1 , colnames(data) %in% covars[x]])), #treated var - var(as.numeric(data[data[, (colnames(data) == paste0(exposure, ".", - exposure_time_pts[1]))] == 0 , colnames(data) %in% covars[x]])) #untreated var - )) - })) + # bal_stats <- bal_stats %>% + bal_stats$std_bal_stats <- weighted_bal_stats/ + sapply(seq(ncol(data[, covars])), function(x){ + sqrt(mean( #dividing by pool SD estimate (unadjusted) + var(as.numeric(data[data[, (colnames(data) %in% paste0(exposure, ".", + exposure_time_pts[1]))] == 1 , colnames(data) %in% covars[x]])), #treated var + var(as.numeric(data[data[, (colnames(data) == paste0(exposure, ".", + exposure_time_pts[1]))] == 0 , colnames(data) %in% covars[x]])) #untreated var + )) + }) #temp error warning re: factor w/ multiple levels creating different numbers of variables per history --makes bal_stats a list and breaks std code if(inherits(bal_stats, "list")){ @@ -377,8 +387,10 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 - bal_stats <- bal_stats %>% - dplyr::select(contains(c("std"))) + # bal_stats <- bal_stats %>% + # dplyr::select(contains(c("std"))) + bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) + } #ends binary } #ends weighted=0 @@ -389,8 +401,9 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (exposure_type == "continuous") { # finds balance for each covariate clustered/subset by history bal_stats <- sapply(sort(unique(temp$history)), function(i) { - temp2 <- temp %>% - dplyr::filter(history == i) + # temp2 <- temp %>% + # dplyr::filter(history == i) + temp2 <- temp[temp$history == i, ] cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance subset = temp2$history[temp2$history == i] == i, # subsetting by that history @@ -411,25 +424,28 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after weighting by history - bal_stats <- bal_stats %>% - dplyr::mutate(std_bal_stats = weighted_bal_stats / - (sapply(seq(nrow(bal_stats)), function(x) { #issue: looking in data for unweighted vals but factors have additional vars - sd(as.numeric(data2[, rownames(bal_stats)[x]]), na.rm = TRUE) }) *# unweighted covar sd - sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE))) # exposure SD at that time pt + # bal_stats <- bal_stats %>% + bal_stats$std_bal_stats <- weighted_bal_stats / + (sapply(seq(nrow(bal_stats)), function(x) { #issue: looking in data for unweighted vals but factors have additional vars + sd(as.numeric(data2[, rownames(bal_stats)[x]]), na.rm = TRUE) }) *# unweighted covar sd + sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE)) # exposure SD at that time pt # For a weighted_bal_stat of 0, make std stat also 0 so as not to throw an error bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 - bal_stats <- bal_stats %>% - dplyr::select(contains(c("std"))) + # bal_stats <- bal_stats %>% + # dplyr::select(contains(c("std"))) + bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) + } #ends continuous else if (exposure_type == "binary") { # finds balance for each covariate clustered/subset by history bal_stats <- sapply(sort(unique(temp$history)), function(i) { - temp2 <- temp %>% - dplyr::filter(history == i) + # temp2 <- temp %>% + # dplyr::filter(history == i) + temp2 <- temp[temp$history == i, ] cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference subset = temp2$history[temp2$history == i] == i, # subsetting by that history @@ -450,16 +466,16 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after finding weighted balance stats - bal_stats <- bal_stats %>% - dplyr::mutate(std_bal_stats = weighted_bal_stats/ - sapply(seq(ncol(data[, covars])), function(x){ - sqrt(mean( #dividing by pool SD estimate (unadjusted) - var(as.numeric(data[data[, (colnames(data) %in% paste0(exposure, ".", - exposure_time_pts[1]))] == 1 , colnames(data) %in% covars[x]])), #treated var - var(as.numeric(data[data[, (colnames(data) == paste0(exposure, ".", - exposure_time_pts[1]))] == 0 , colnames(data) %in% covars[x]])) #untreated var - )) - })) + # bal_stats <- bal_stats %>% + bal_stats$std_bal_stats <- weighted_bal_stats/ + sapply(seq(ncol(data[, covars])), function(x){ + sqrt(mean( #dividing by pool SD estimate (unadjusted) + var(as.numeric(data[data[, (colnames(data) %in% paste0(exposure, ".", + exposure_time_pts[1]))] == 1 , colnames(data) %in% covars[x]])), #treated var + var(as.numeric(data[data[, (colnames(data) == paste0(exposure, ".", + exposure_time_pts[1]))] == 0 , colnames(data) %in% covars[x]])) #untreated var + )) + }) # For a weighted_bal_stat of 0, make std stat also 0 so as not to throw an error bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 @@ -468,8 +484,10 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 - bal_stats <- bal_stats %>% - dplyr::select(contains(c("std"))) + # bal_stats <- bal_stats %>% + # dplyr::select(contains(c("std"))) + bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) + } #ends binary } #ends weighted @@ -492,7 +510,10 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ f_vars <- colnames(data)[sapply(data, is.factor)] f_stats <- bal_stats[rownames(bal_stats)[sapply(strsplit(rownames(bal_stats), "_"), "[", 1) %in% f_vars], ] f_stats$name <- sapply(strsplit(rownames(f_stats), "_"), "[", 1) - test <- f_stats %>% dplyr::group_by(name) %>% dplyr::summarize(new = mean(std_bal_stats)) + # test <- f_stats %>% dplyr::group_by(name) %>% dplyr::summarize(new = mean(std_bal_stats)) + test <- aggregate(std_bal_stats ~ name, data = f_stats, + FUN = function(x) new = mean(x)) + colnames(test) <- c("covariate", "std_bal_stats") new <- data.frame(std_bal_stats = test$std_bal_stats, covariate = test$covariate) @@ -502,21 +523,28 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ #adds custom bal thresh info if (!is.null(imp_conf)){ - bal_stats <- bal_stats %>% - dplyr::mutate(bal_thresh = ifelse(bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2])) - bal_stats <- bal_stats %>% - dplyr:: mutate(balanced = ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) ) + # bal_stats <- bal_stats %>% + # dplyr::mutate(bal_thresh = ifelse(bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2])) + bal_stats$bal_thresh <- ifelse(bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2]) + # bal_stats <- bal_stats %>% + # dplyr:: mutate(balanced = ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) ) + bal_stats$balanced <- ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) } else{ - bal_stats <- bal_stats %>% - dplyr::mutate(bal_thresh = balance_thresh) - bal_stats <- bal_stats %>% - dplyr:: mutate(balanced = ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) ) + # bal_stats <- bal_stats %>% + # dplyr::mutate(bal_thresh = balance_thresh) + bal_stats$bal_thresh <- balance_thresh + # bal_stats <- bal_stats %>% + # dplyr:: mutate(balanced = ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) ) + bal_stats$balanced <- ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) } - bal_stats <- bal_stats %>% - dplyr::mutate(exposure = exposure, exp_time = exposure_time_pt) %>% - dplyr::mutate(covar_time = sapply(strsplit(covariate, "\\."), "[", 2)) + # bal_stats <- bal_stats %>% + # dplyr::mutate(exposure = exposure, exp_time = exposure_time_pt) %>% + # dplyr::mutate(covar_time = sapply(strsplit(covariate, "\\."), "[", 2)) + bal_stats$exposure <- exposure + bal_stats$exp_time <- exposure_time_pt + bal_stats$covar_time <- sapply(strsplit(bal_stats$covariate, "\\."), "[", 2) all_bal_stats <- rbind(all_bal_stats, bal_stats) all_bal_stats$covar_time[is.na(all_bal_stats$covar_time)] <- 0 @@ -542,11 +570,18 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # Summarizing balance - bal_summary_exp <- all_bal_stats %>% - dplyr::group_by(exp_time) %>% - dplyr::summarize(balanced_n = sum(balanced == 1), # Tallying balanced covariates - imbalanced_n = sum(balanced == 0), # Tallying imbalanced covariates - n = dplyr::n()) + # bal_summary_exp <- all_bal_stats %>% + # dplyr::group_by(exp_time) %>% + # dplyr::summarize(balanced_n = sum(balanced == 1), # Tallying balanced covariates + # imbalanced_n = sum(balanced == 0), # Tallying imbalanced covariates + # n = dplyr::n()) + + bal_summary_exp <- as.data.frame(aggregate(balanced ~ exp_time, data = all_bal_stats, + FUN = function(x) c(balanced_n = sum(x == 1), + imbalanced_n = sum(x == 0), + n = length(x)) )) + bal_summary_exp <- do.call(data.frame, bal_summary_exp) + colnames(bal_summary_exp) <- c("exp_time", "balanced_n", "imbalanced_n", "n") if (save.out){ diff --git a/R/compareHelpers.R b/R/compareHelpers.R index a5a96cc7..68973bfd 100644 --- a/R/compareHelpers.R +++ b/R/compareHelpers.R @@ -209,7 +209,7 @@ perform_multiple_comparison_correction <- function(comps, reference, comp_histor cat("\n") cat(paste0("Conducting multiple comparison correction using the ", method, " method."), "\n") cat("\n") - corr_p <-stats::p.adjust(comps$p.value, method = method) + corr_p <- stats::p.adjust(comps$p.value, method = method) comps <- cbind(comps, p.value_corr = corr_p) } else { diff --git a/R/compareHistories.R b/R/compareHistories.R index 8dc27e86..09d60493 100644 --- a/R/compareHistories.R +++ b/R/compareHistories.R @@ -4,16 +4,6 @@ #' histories (pooling for imputed data), before conducting contrast comparisons #' (pooling for imputed data), correcting for multiple comparisons, and then #' plotting results. - -#' @importFrom gtools permutations -#' @importFrom marginaleffects avg_predictions -#' @importFrom marginaleffects hypotheses -#' @importFrom stringr str_count -#' @importFrom dplyr filter -#' @importFrom stargazer stargazer -#' @importFrom mice pool -#' @importFrom stats p.adjust -#' @importFrom knitr kable #' @seealso {[marginaleffects::avg_predictions()], #' } #' @seealso {[marginaleffects::hypotheses()], @@ -372,8 +362,9 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod # If the user specified reference and comparison groups, subset pred_pool for inspection and plotting if (!is.na(reference) & !is.null(comp_histories)) { - preds_pool <- preds_pool %>% - dplyr::filter(history %in% c(reference, comp_histories)) + # preds_pool <- preds_pool %>% + # dplyr::filter(history %in% c(reference, comp_histories)) + preds_pool <- preds_pool[preds_pool$history %in% c(reference, comp_histories), ] } @@ -434,8 +425,6 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod })) - - if (save.out){ if(is.null(hi_lo_cut)){ hi_lo_cut <- "median+-.1" @@ -479,8 +468,9 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod # If the user specified reference and comparison groups, subset preds for inspection and plotting if (!is.na(reference) & !is.null(comp_histories)) { - preds <- preds %>% - dplyr::filter(history %in% c(reference, comp_histories)) + # preds <- preds %>% + # dplyr::filter(history %in% c(reference, comp_histories)) + preds <- preds[preds$history %in% c(reference, comp_histories), ] } @@ -594,8 +584,9 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod comparisons$history <- as.factor(comparisons$history) comparisons$dose <- as.factor(comparisons$dose) - comparisons <- comparisons %>% - dplyr::arrange(dose) # Order by dose + # comparisons <- comparisons %>% + # dplyr::arrange(dose) # Order by dose + comparisons <- comparisons[order(comparisons$dose), ] if (length(colors) > 1) { # If user input a list of colors p <- ggplot2::ggplot(data = comparisons, aes(x = estimate, y = history, color = dose)) + diff --git a/R/createFormulas.R b/R/createFormulas.R index 215b811b..e7042cf8 100644 --- a/R/createFormulas.R +++ b/R/createFormulas.R @@ -120,13 +120,13 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, if (missing(type)){ stop("Please supply a 'full', 'short', or 'update' type", call. = FALSE) } - - if(!is.character(type)){ + else if(!is.character(type)){ stop("Please provide a character string type from the following list: 'short', 'full', or 'update'", call. = FALSE) } - if(! type %in% c("short", "full", "update")){ - stop("Please provide a type from the following list: 'short', 'full', or 'update'", call. = FALSE) + else if(! type %in% c("short", "full", "update") | length(type) != 1){ + stop("Please provide a single type from the following list: 'short', 'full', or 'update'", call. = FALSE) } + if (type != "update" & !is.null(bal_stats)){ stop ("Please only provide balance statistics for the type 'update'.", call. = FALSE) } @@ -203,17 +203,20 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, exposure_time_pts[x-1]] if (x > 1) { - new <- bal_stats %>% - dplyr::filter(balanced == 0) %>% - dplyr::filter(exp_time == time, as.numeric(covar_time) < exposure_time_pts[x - 1], - as.numeric(covar_time) > 0) %>% # Finds any lagged imbalanced covars - dplyr::select(covariate) + # new <- bal_stats %>% + # dplyr::filter(balanced == 0) %>% + # dplyr::filter(exp_time == time, as.numeric(covar_time) < exposure_time_pts[x - 1], + # as.numeric(covar_time) > 0) %>% # Finds any lagged imbalanced covars + # dplyr::select(covariate) + + new <- bal_stats[bal_stats$balanced ==0 & + bal_stats$exp_time == time & + as.numeric(bal_stats$covar_time) < exposure_time_pts[x-1] & + as.numeric(bal_stats$covar_time) > 0, ] + new <- new[, "covariate"] - # # Renames factors (that were appended w/ level) if (nrow(new) > 0) { - # new$covariate[sapply(strsplit(new$covariate, "_"), `[`, 1) %in% factor_covariates] <- - # sapply(strsplit(new$covariate, "_"), `[`, 1)[sapply(strsplit(new$covariate, "_"), `[`, 1) %in% factor_covariates] new <- as.character(unlist(new)) diff --git a/R/eval_hist.R b/R/eval_hist.R index 74f35fea..3d2d378b 100644 --- a/R/eval_hist.R +++ b/R/eval_hist.R @@ -61,8 +61,6 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, ref = NA, comps = NULL, verbose = TRUE){ exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', time_pts[1])], "numeric"), "continuous", "binary") - - data_wide <- data @@ -86,18 +84,24 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, # Finds data from each time point in each epoch, horizontally aligns all exposure values within the epoch for averaging for (l in seq_len(length(as.numeric(unlist(epochs[e, 2]))))) { level <- as.numeric(unlist(epochs[e, 2]))[l] - z <- data_wide[, names(data_wide)[grepl(exposure, names(data_wide))]] #finds exposure vars - z <- as.numeric(as.character(unlist(z[, sapply(strsplit(names(z), "\\."), "[", 2) == as.character(level)]))) + z <- as.data.frame(data_wide[, names(data_wide)[grepl(exposure, names(data_wide))]]) #finds exposure vars + cols <- colnames(z)[as.logical(sapply(strsplit(names(z), "\\."), "[", 2) == as.character(level))] + cols <- cols[!is.na(cols)] + z <- as.numeric(as.character(unlist(z[, cols]))) temp <- cbind(temp, z) } - new <- new %>% - dplyr::mutate(!!new_var := rowMeans(temp, na.rm = TRUE)) + # new <- new %>% + x <- as.data.frame(rowMeans(temp, na.rm = TRUE)) + colnames(x) <- c(new_var) + new <- cbind(new, x) + # dplyr::mutate(!!new_var := rowMeans(temp, na.rm = TRUE)) } } if(verbose){ cat("Summary of Exposure Main Effects:", "\n") - print(psych::describe(new %>% select(-c("ID")), fast = TRUE)) + summary(new[, !colnames(new) %in% "ID"]) + # print(psych::describe(new %>% select(-c("ID")), fast = TRUE)) cat("\n") } @@ -187,13 +191,17 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, } # Summarizing n's by history - his_summ <- new %>% - dplyr::group_by(history) %>% - dplyr::summarize(n = dplyr::n()) + # his_summ <- new %>% + # dplyr::group_by(history) %>% + # dplyr::summarize(n = dplyr::n()) + new$history <- unlist(new$history) + his_summ <- aggregate( ID ~ history, data = new, + FUN = function(x) n = length(x)) if( !is.na(ref) & !is.null(comps)){ - his_summ <- his_summ %>% - dplyr::filter(history %in% c(ref, comps)) + # his_summ <- his_summ %>% + # dplyr::filter(history %in% c(ref, comps)) + his_sum <- his_summ[his_summ$history %in% c(ref, comps), ] } his_summ <- his_summ[! grepl("NA", his_summ$history),] diff --git a/R/fitModel.R b/R/fitModel.R index a48c631e..27abfe9e 100644 --- a/R/fitModel.R +++ b/R/fitModel.R @@ -3,9 +3,6 @@ #' Fits weighted marginal outcome model as a generalized linear model of the #' user's choosing, relating exposure main effects to outcome using IPTW #' weights. -#' @importFrom survey svyglm -#' @importFrom jtools export_summs -#' @importFrom dplyr mutate filter select #' @seealso {[survey::svyglm()] for more on family/link specifications, #' } #' @param home_dir path to home directory (required if 'save.out' = TRUE) @@ -213,7 +210,7 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco if (mice::is.mids(data)){ #imputed dataset fits <- lapply(seq_len(data$m), function(y) { - d <- complete(data, y) + d <- mice::complete(data, y) d$weights <- NULL d$weights <- weights[[y]]$weights @@ -221,7 +218,7 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco }) fits.null <- lapply(seq_len(data$m), function(y) { - d <- complete(data, y) + d <- mice::complete(data, y) d$weights <- NULL d$weights <- weights[[y]]$weights @@ -325,8 +322,8 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco } if (save.out){ - requireNamespace(officer) #is there another way to do this? required for writing to word - requireNamespace(flextable) # " " + # requireNamespace(officer) #is there another way to do this? required for writing to word + # requireNamespace(flextable) # " " suppressWarnings(jtools::export_summs(fits, to.file = "docx", statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), file.name = file.path(home_dir, "models", paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx")))) diff --git a/R/getModel.R b/R/getModel.R index e5d6aa8b..e25a3329 100644 --- a/R/getModel.R +++ b/R/getModel.R @@ -1,8 +1,6 @@ #' Fits outcome model #' -#' @importFrom survey svyglm -#' @importFrom survey svydesign #' @param d wide data frame #' @param exposure name of exposure variable #' @param exposure_time_pts list of integers at which weights will be @@ -89,11 +87,17 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs for (l in seq_len(length(as.numeric(unlist(epochs[e, 2]))))){ level <- as.numeric(unlist(epochs[e, 2]))[l] z <- d[, names(d)[grepl(exposure, names(d))]] #finds exposure vars - z <- as.numeric(as.character(unlist(z[, sapply(strsplit(names(z), "\\."), "[", 2) == as.character(level)]))) + cols <- colnames(z)[as.logical(sapply(strsplit(names(z), "\\."), "[", 2) == as.character(level))] + cols <- cols[!is.na(cols)] + z <- as.numeric(as.character(unlist(z[, cols]))) temp <- cbind(temp, z) } #adds a new variable of the exposure averaged within epoch - d <- d %>% dplyr::mutate(!!new_var := rowMeans(temp, na.rm=T)) + # d <- d %>% dplyr::mutate(!!new_var := rowMeans(temp, na.rm=T)) + + x <- as.data.frame(rowMeans(temp, na.rm = TRUE)) + colnames(x) <- c(new_var) + d <- cbind(d, x) d[, new_var] <- as.numeric(d[, new_var]) } } @@ -129,7 +133,10 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs name <- gsub(" ", "", unlist(strsplit(interactions, "\\+"))[x]) if (! name %in% colnames(d)){ temp <- d[, c(gsub(" ", "", as.character(unlist(strsplit(unlist(strsplit(interactions, "\\+"))[x], "\\:"))))) ] - d <- d %>% dplyr::mutate(!! name := matrixStats::rowProds(as.matrix(temp), na.rm = T)) + # d <- d %>% dplyr::mutate(!! name := matrixStats::rowProds(as.matrix(temp), na.rm = T)) + new <- matrixStats::rowProds(as.matrix(temp), na.rm = T) + names(new) <- name + d <- cbind(d, new) } } } diff --git a/R/make_love_plot.R b/R/make_love_plot.R index 3c6f7457..d83bb653 100644 --- a/R/make_love_plot.R +++ b/R/make_love_plot.R @@ -51,7 +51,6 @@ #' exposure = "A", #' exposure_time_pts = c(1, 2, 3), #' outcome = "D.3", -#' tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), #' type = "weighted", #' weights = w, #' formulas = f, @@ -88,7 +87,8 @@ make_love_plot <- function(home_dir, folder, exposure, exposure_time_pt, exposur stat_var <- colnames(balance_stats)[grepl("_bal", colnames(balance_stats))] colnames(balance_stats)[colnames(balance_stats) == stat_var] <- "avg_bal" - balance_stats <- balance_stats %>% dplyr::arrange(avg_bal) + # balance_stats <- balance_stats %>% dplyr::arrange(avg_bal) + balance_stats <- balance_stats[order(balance_stats$avg_bal), ] x_lab <- ifelse(exposure_type == "continuous", "Correlation with Exposure", "Standardized Mean Difference Between Exposures") diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd index f7c2b680..8a48982d 100644 --- a/examplePipelineRevised.Rmd +++ b/examplePipelineRevised.Rmd @@ -75,6 +75,7 @@ exposure_time_pts <- c(6, 15, 24, 35, 58) outcome <- "EF_avg_perc.58" +#these are now optional tv_confounders <- c("SAAmylase.6","SAAmylase.15", "SAAmylase.24", "MDI.6", "MDI.15", # "RHasSO.6", "RHasSO.15", "RHasSO.24", #removed for low variability @@ -303,13 +304,11 @@ custom <- full_formulas type = "full" #all inputs -full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, +full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = tv_confounders, concur_conf = concur_conf, keep_conf = NULL, custom = custom, verbose = TRUE, save.out = TRUE) #or minimum inputs -full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type) - -full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders) +full_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = tv_confounders) ``` @@ -364,11 +363,11 @@ custom <- NULL type <- "short" #all inputs -short_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, +short_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = tv_confounders, concur_conf = concur_conf, keep_conf = keep_conf, custom = custom, verbose = TRUE, save.out = TRUE) #or minimum inputs -short_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type) +short_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = tv_confounders) ``` @@ -516,11 +515,12 @@ type <- "update" bal_stats <- balance_stats #all inputs -updated_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, bal_stats, +updated_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = tv_confounders, concur_conf, keep_conf, verbose = TRUE, save.out = TRUE) #minimum inputs -updated_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders, type, bal_stats) +updated_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome, type, ti_confounders, tv_confounders = tv_confounders, + bal_stats = bal_stats) ``` @@ -652,7 +652,7 @@ models <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome #minimum inputs model <- "m0" -models <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, model) +models <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, model, epochs = epochs) #some optional inputs models <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, model, @@ -722,8 +722,8 @@ results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, mode #minimum inputs reference <- "l-l-l-l-l" comparison <- c("h-h-h-h-h", "l-h-h-h-h") #multiple -results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, model, - reference = reference, comparison = comparison) # for our data, specify ref and comp bc too many comparisons for exposure time point; +results <- compareHistories(home_dir, exposure, exposure_time_pts, outcome, model, epochs = epochs) + # reference = reference, comparison = comparison) # for our data, specify ref and comp bc too many comparisons for exposure time point; ``` ##### Sensitvity analyses From e76d1efe75f73643c9b05daffc7da422cebc6a54 Mon Sep 17 00:00:00 2001 From: Isa Stallworthy <31548151+istallworthy@users.noreply.github.com> Date: Mon, 25 Sep 2023 15:47:02 -0400 Subject: [PATCH 06/12] imports and error checking --- DESCRIPTION | 1 - R/assessBalance.R | 50 ++++++++++++++++++++++++++++++-------- R/calcBalStats.R | 29 ---------------------- R/compareHistories.R | 31 +++++++++++++++++++++++- R/createFormulas.R | 40 ++++++++++++++++++++++++++++++- R/createWeights.R | 51 +++++++++++++++++++++++++++------------ R/fitModel.R | 57 +++++++++++++++++++++++++++++++++++++------- R/trimWeights.R | 38 ++++++++++++++++++++++++++--- 8 files changed, 229 insertions(+), 68 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a073924f..51d1ff67 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,6 @@ LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.2 Imports: - dplyr, WeightIt, marginaleffects, tidyr, diff --git a/R/assessBalance.R b/R/assessBalance.R index e7f27895..2072373e 100644 --- a/R/assessBalance.R +++ b/R/assessBalance.R @@ -124,6 +124,9 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, if (missing(home_dir)) { stop("Please supply a home directory.", call. = FALSE) } + else if(!is.character(home_dir)){ + stop("Please provide a valid home directory path as a string if you wish to save output locally.", call. = FALSE) + } else if(!dir.exists(home_dir)) { stop("Please provide a valid home directory path if you wish to save output locally.", call. = FALSE) } @@ -133,24 +136,45 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, stop("Please supply data as either a dataframe with no missing data or imputed data in the form of a mids object or path to folder with imputed csv datasets.", call. = FALSE) } + else if (!mice::is.mids(data) & !is.data.frame(data) & !inherits(data, "list")) { + stop("Please provide wide data as a 'mids' object, a data frame, or a list of imputed csv files in the 'data' field.", call. = FALSE) + } + + if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } + else if(!is.character(exposure) | length(exposure) != 1){ + stop("Please supply a single exposure as a character.", call. = FALSE) + } + if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } + else if(!is.character(outcome) | length(outcome) != 1){ + stop("Please supply a single outcome as a character.", call. = FALSE) + } + if (missing(exposure_time_pts)){ stop("Please supply the exposure time points at which you wish to create weights.", call. = FALSE) } - - if (missing(type)){ - stop("Please supply a 'weighted', 'prebalance' type", call. = FALSE) + else if(!is.numeric(exposure_time_pts)){ + stop("Please supply a list of exposure time points as integers.", call. = FALSE) } + if (missing(formulas)){ stop("Please supply a list of balancing formulas.", call. = FALSE) } + else if(!inherits(formulas, "list")){ + stop("Please provide a list of formulas for each exposure time point", call. = FALSE) + } + else if(length(formulas) != length(exposure_time_pts)){ + stop("Please provide a list of formulas for each exposure time point", call. = FALSE) + } - + if (missing(type)){ + stop("Please supply a 'weighted', 'prebalance' type", call. = FALSE) + } if (!inherits(type, "character") | length(type) != 1 ){ stop("Please provide a single type as a character string from the following list: 'prebalance', 'weighted'", call. = FALSE) } @@ -164,10 +188,6 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, stop("The 'weighted' mode of this function requires weights be supplied in the form of output from createWeights.", call. = FALSE) } - if (!mice::is.mids(data) & !is.data.frame(data) & !inherits(data, "list")) { - stop("Please provide either a 'mids' object, a data frame, or a list of imputed csv files in the 'data' field.", call. = FALSE) - } - if (!is.null(weights) & ! inherits(weights, "list")){ stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) } @@ -186,8 +206,18 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, stop("Please provide a list variable names as characters that are important confounders.", call. = FALSE) } - if(!inherits(formulas, "list")){ - stop("Please provide a list of formulas for each exposure time point", call. = FALSE) + if(!is.logical(verbose)){ + stop("Please set verbose to either TRUE or FALSE.", call. = FALSE) + } + else if(length(verbose) != 1){ + stop("Please provide a single TRUE or FALSE value to verbose.", call. = FALSE) + } + + if(!is.logical(save.out)){ + stop("Please set save.out to either TRUE or FALSE.", call. = FALSE) + } + else if(length(save.out) != 1){ + stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE) } diff --git a/R/calcBalStats.R b/R/calcBalStats.R index 27d34dae..d65c0268 100644 --- a/R/calcBalStats.R +++ b/R/calcBalStats.R @@ -5,22 +5,6 @@ #' approaches to assessing balance for time-varying exposures by weighting #' statistics based on sample distribution in exposure histories. #' -#' @importFrom ggplot2 ggplot -#' @importFrom ggplot2 theme -#' @importFrom ggplot2 geom_text -#' @importFrom ggplot2 geom_vline -#' @importFrom ggplot2 geom_point -#' @importFrom ggplot2 xlab -#' @importFrom ggplot2 aes -#' @importFrom ggplot2 xlim -#' @importFrom ggplot2 ggtitle -#' @importFrom ggplot2 scale_y_discrete -#' @importFrom ggplot2 element_text -#' @importFrom ggplot2 element_rect -#' @importFrom ggplot2 element_blank -#' @importFrom ggplot2 guide_axis -#' @importFrom ggplot2 ggsave -#' @importFrom stargazer stargazer #' @param home_dir (optional) path to home directory (required if save.out = #' TRUE) #' @param data data in wide format as: a data frame, path to folder of imputed @@ -111,7 +95,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (weighted == 1){ weights_method = weights$method w <- weights$weights #IPTW weights - # data <- data %>% dplyr::mutate(weights = as.numeric(w)) data$weights <- as.numeric(w) } else{ @@ -319,14 +302,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) - # standardizing balance statistics after weighting by history - # bal_stats <- bal_stats %>% - # dplyr::mutate(std_bal_stats = weighted_bal_stats / - # (sapply(seq(ncol(data[, covars])), function(x) { #issue: looking in data for unweighted vals but factors have additional vars - # sd(as.numeric(data[, covars][, x]), na.rm = TRUE) }) *# unweighted covar sd - # sd(data[, paste0(exposure, ".", exposure_time_pt)], na.rm = TRUE))) # exposure SD at that time pt - - # bal_stats <- bal_stats %>% bal_stats$std_bal_stats <- weighted_bal_stats / (sapply(seq(nrow(bal_stats)), function(x) { #issue: looking in data for unweighted vals but factors have additional vars @@ -501,10 +476,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(bal_stats) bal_stats$covariate <- rownames(bal_stats) - # # Renames factor covariates - # bal_stats$covariate[sapply(strsplit(bal_stats$covariate, "_"), "[", 1) %in% factor_covariates] <- - # sapply(strsplit(bal_stats$covariate, "_"), "[", 1)[sapply(strsplit(bal_stats$covariate, "_"), "[", 1) %in% factor_covariates] - #averages across factor levels to create one bal stat per factor variable? data$ID <- as.numeric(data$ID) f_vars <- colnames(data)[sapply(data, is.factor)] diff --git a/R/compareHistories.R b/R/compareHistories.R index 09d60493..6628ec27 100644 --- a/R/compareHistories.R +++ b/R/compareHistories.R @@ -106,6 +106,9 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod if (missing(home_dir)) { stop("Please supply a home directory.", call. = FALSE) } + else if(!is.character(home_dir)){ + stop("Please provide a valid home directory path as a string if you wish to save output locally.", call. = FALSE) + } else if(!dir.exists(home_dir)) { stop("Please provide a valid home directory path if you wish to save output locally.", call. = FALSE) } @@ -114,20 +117,46 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } + else if(!is.character(exposure) | length(exposure) != 1){ + stop("Please supply a single exposure as a character.", call. = FALSE) + } + if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } + else if(!is.character(outcome) | length(outcome) != 1){ + stop("Please supply a single outcome as a character.", call. = FALSE) + } + if (missing(exposure_time_pts)){ stop("Please supply the exposure time points at which you wish to create weights.", call. = FALSE) } + else if(!is.numeric(exposure_time_pts)){ + stop("Please supply a list of exposure time points as integers.", call. = FALSE) + } if (missing(model)){ stop("Please supply a list of model output", call. = FALSE) } - if(!inherits(model, "list")){ + else if(!inherits(model, "list")){ stop("Please provide a list of model output from the fitModel function.", call. = FALSE) } + if(!is.logical(verbose)){ + stop("Please set verbose to either TRUE or FALSE.", call. = FALSE) + } + else if(length(verbose) != 1){ + stop("Please provide a single TRUE or FALSE value to verbose.", call. = FALSE) + } + + if(!is.logical(save.out)){ + stop("Please set save.out to either TRUE or FALSE.", call. = FALSE) + } + else if(length(save.out) != 1){ + stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE) + } + + if(save.out){ histories_dir <- file.path(home_dir, "histories") diff --git a/R/createFormulas.R b/R/createFormulas.R index e7042cf8..23e2be1c 100644 --- a/R/createFormulas.R +++ b/R/createFormulas.R @@ -95,6 +95,9 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, if (missing(home_dir)) { stop("Please supply a home directory.", call. = FALSE) } + else if(!is.character(home_dir)){ + stop("Please provide a valid home directory path as a string if you wish to save output locally.", call. = FALSE) + } else if(!dir.exists(home_dir)) { stop("Please provide a valid home directory path if you wish to save output locally.", call. = FALSE) } @@ -103,20 +106,37 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } + else if(!is.character(exposure) | length(exposure) != 1){ + stop("Please supply a single exposure as a character.", call. = FALSE) + } + if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } + else if(!is.character(outcome) | length(outcome) != 1){ + stop("Please supply a single outcome as a character.", call. = FALSE) + } + if (missing(exposure_time_pts)){ stop("Please supply the exposure time points at which you wish to create weights.", call. = FALSE) } + else if(!is.numeric(exposure_time_pts)){ + stop("Please supply a list of exposure time points as integers.", call. = FALSE) + } + if (missing(tv_confounders)){ warning("You have not specified any time-varying confounders. If you have time-varying exposure, please list all wide exposure variables as tv_confounders.", call. = FALSE) tv_confounders <- character(0) } + else if(!is.character(tv_confounders)){ + stop("Please provide a list of time-varying confounders as character strings.") + } + if (missing(ti_confounders)){ stop("You have not specified time invariant confounders.", call. = FALSE) # ti_confounders <- NULL } + if (missing(type)){ stop("Please supply a 'full', 'short', or 'update' type", call. = FALSE) } @@ -135,6 +155,20 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, stop("Please provide a data frame of balance statistics from the assessBalance function.", call. = FALSE) } + if(!is.logical(verbose)){ + stop("Please set verbose to either TRUE or FALSE.", call. = FALSE) + } + else if(length(verbose) != 1){ + stop("Please provide a single TRUE or FALSE value to verbose.", call. = FALSE) + } + + if(!is.logical(save.out)){ + stop("Please set save.out to either TRUE or FALSE.", call. = FALSE) + } + else if(length(save.out) != 1){ + stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE) + } + time_varying_covariates <- tv_confounders all_covars <- c(tv_confounders, ti_confounders) @@ -192,7 +226,11 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, else if (type == "update"){ if(is.null(bal_stats)){ - stop("Please provide balance statistics if you wish to run the update version of this function", call. = FALSE) + stop("Please provide balance statistics from the assessBalance() function if you wish to run the update version of this function", call. = FALSE) + } + else if (!is.data.frame(bal_stats)){ + stop("Please provide balance statistics from the assessBalance() function if you wish to run the update version of this function", call. = FALSE) + } if(verbose){ diff --git a/R/createWeights.R b/R/createWeights.R index 0a109c07..888fd3c9 100644 --- a/R/createWeights.R +++ b/R/createWeights.R @@ -5,10 +5,6 @@ #' relevant confounders. #' #' @export -#' @importFrom ggplot2 ggplot -#' @importFrom ggplot2 geom_histogram -#' @importFrom ggplot2 ggsave -#' @importFrom WeightIt weightitMSM #' @seealso {[WeightIt::weightitMSM()], #' } #' @param home_dir path to home directory (required if 'save.out' = TRUE) @@ -71,6 +67,9 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = if (missing(home_dir)) { stop("Please supply a home directory.", call. = FALSE) } + else if(!is.character(home_dir)){ + stop("Please provide a valid home directory path as a string if you wish to save output locally.", call. = FALSE) + } else if(!dir.exists(home_dir)) { stop("Please provide a valid home directory path if you wish to save output locally.", call. = FALSE) } @@ -80,34 +79,56 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = stop("Please supply data as either a dataframe with no missing data or imputed data in the form of a mids object or path to folder with imputed csv datasets.", call. = FALSE) } + else if (!mice::is.mids(data) & !is.data.frame(data) & !inherits(data, "list")) { + stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", + call. = FALSE) + } + if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } + else if(!is.character(exposure) | length(exposure) != 1){ + stop("Please supply a single exposure as a character.", call. = FALSE) + } + if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } + else if(!is.character(outcome) | length(outcome) != 1){ + stop("Please supply a single outcome as a character.", call. = FALSE) + } if (missing(formulas)){ stop("Please supply a list of balancing formulas.", call. = FALSE) } + else if(!inherits(formulas, "list")){ + stop("Please provide a list of formulas for each exposure time point", call. = FALSE) + } if(!inherits(method, "character")){ stop("Please provide as a character string a weights method from this list: 'ps', 'glm', 'gbm', 'bart', 'super', 'cbps'.", call. = FALSE) } - if(! method %in% c("ps", "glm", "gbm", "bart", "super", "cbps")){ + else if(! method %in% c("ps", "glm", "gbm", "bart", "super", "cbps")){ stop("Please provide a weights method from this list: 'ps', 'glm', 'gbm', 'bart', 'super', 'cbps'.", call. = FALSE) } - if (!mice::is.mids(data) & !is.data.frame(data) & !inherits(data, "list")) { - stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", - call. = FALSE) + if(!is.logical(verbose)){ + stop("Please set verbose to either TRUE or FALSE.", call. = FALSE) + } + else if(length(verbose) != 1){ + stop("Please provide a single TRUE or FALSE value to verbose.", call. = FALSE) } - if(!inherits(formulas, "list")){ - stop("Please provide a list of formulas for each exposure time point", call. = FALSE) + if(!is.logical(save.out)){ + stop("Please set save.out to either TRUE or FALSE.", call. = FALSE) } + else if(length(save.out) != 1){ + stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE) + } + + weights_method <- method form_name <- sapply(strsplit(names(formulas[1]), "_form"), "[", 1) @@ -249,9 +270,9 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = if (verbose){ cat(paste0("For imputation", i, " and ", weights_method, - ", weighting method, the median weight value is ", round(median(fit$weights), 2) , - " (SD= ", round(sd(fit$weights), 2), "; range= ", round(min(fit$weights), 2), "-", - round(max(fit$weights), 2), ")."), "\n") + ", weighting method, the median weight value is ", round(median(fit$weights), 2) , + " (SD= ", round(sd(fit$weights), 2), "; range= ", round(min(fit$weights), 2), "-", + round(max(fit$weights), 2), ")."), "\n") cat('\n') } @@ -305,8 +326,8 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = if (verbose){ cat(paste0("For the ", weights_method, " weighting method, the median weight value is ", - round(median(data$weights), 2) , " (SD = ", round(sd(data$weights), 2), "; range = ", - round(min(data$weights), 2), "-", round(max(data$weights), 2), ")."), "\n") + round(median(data$weights), 2) , " (SD = ", round(sd(data$weights), 2), "; range = ", + round(min(data$weights), 2), "-", round(max(data$weights), 2), ")."), "\n") cat('\n') } diff --git a/R/fitModel.R b/R/fitModel.R index 27abfe9e..17f50b56 100644 --- a/R/fitModel.R +++ b/R/fitModel.R @@ -109,6 +109,9 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco if (missing(home_dir)) { stop("Please supply a home directory.", call. = FALSE) } + else if(!is.character(home_dir)){ + stop("Please provide a valid home directory path as a string if you wish to save output locally.", call. = FALSE) + } else if(!dir.exists(home_dir)) { stop("Please provide a valid home directory path if you wish to save output locally.", call. = FALSE) } @@ -118,27 +121,43 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco stop("Please supply data as either a dataframe with no missing data or imputed data in the form of a mids object or path to folder with imputed csv datasets.", call. = FALSE) } + else if (!mice::is.mids(data) & !is.data.frame(data) & !inherits(data, "list")) { + stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", call. = FALSE) + } + if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } + else if(!is.character(exposure) | length(exposure) != 1){ + stop("Please supply a single exposure as a character.", call. = FALSE) + } + if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } + else if(!is.character(outcome) | length(outcome) != 1){ + stop("Please supply a single outcome as a character.", call. = FALSE) + } + if (missing(weights)){ stop("Please supply a list of IPTW weights.", call. = FALSE) } + else if (!inherits(weights, "list")){ + stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + } + if (missing(exposure_time_pts)){ stop("Please supply the exposure time points at which you wish to create weights.", call. = FALSE) } + else if(!is.numeric(exposure_time_pts)){ + stop("Please supply a list of exposure time points as integers.", call. = FALSE) + } + if (missing(model)){ stop('Please provide an outcome model selection "m" from 0-3 (e.g., "m1")', call. = FALSE) } - if (!mice::is.mids(data) & !is.data.frame(data) & !inherits(data, "list")) { - stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", call. = FALSE) - } - - if (!is.character(model)){ - stop('Please provide as a character string a valid model "m" from 0-3 (e.g., "m1")', call. = FALSE) + else if (!is.character(model) | length(model) != 1){ + stop('Please provide a single outcome model selection "m" from 0-3 (e.g., "m1")', call. = FALSE) } if (!(model %in% c("m0", "m1", "m2", "m3"))) { stop('Please provide a valid model "m" from 0-3 (e.g., "m1")', call. = FALSE) @@ -153,16 +172,38 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco if(!inherits(family, "function")){ stop("Please provide a valid family in the form of a function (without quotations).", call. = FALSE) } + if(length(family) != 1){ + stop("Please provide a single valid family in the form of a function (without quotations).", call. = FALSE) + } + if(!inherits(link, "character")){ stop("Please provide as a character a valid link function.", call. = FALSE) } + else if(length(link) != 1){ + stop("Please provide as a character a valid link function.", call. = FALSE) + } + if (!is.null(covariates)){ + if(!is.character(covariates)){ + stop("Please provide a list of character strings for covariates.", call. = FALSE) + } if (sum(as.numeric(sapply(strsplit(covariates, "\\."), "[", 2)) > exposure_time_pts[1], na.rm = T) > 0){ warning("Please only include covariates that are time invariant or measured at the first exposure time point.") } } - if (!inherits(weights, "list")){ - stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + + if(!is.logical(verbose)){ + stop("Please set verbose to either TRUE or FALSE.", call. = FALSE) + } + else if(length(verbose) != 1){ + stop("Please provide a single TRUE or FALSE value to verbose.", call. = FALSE) + } + + if(!is.logical(save.out)){ + stop("Please set save.out to either TRUE or FALSE.", call. = FALSE) + } + else if(length(save.out) != 1){ + stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE) } diff --git a/R/trimWeights.R b/R/trimWeights.R index 79906e9a..d3465f35 100644 --- a/R/trimWeights.R +++ b/R/trimWeights.R @@ -63,25 +63,57 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v if (missing(home_dir)) { stop("Please supply a home directory.", call. = FALSE) } + else if(!is.character(home_dir)){ + stop("Please provide a valid home directory path as a string if you wish to save output locally.", call. = FALSE) + } else if(!dir.exists(home_dir)) { stop("Please provide a valid home directory path if you wish to save output locally.", call. = FALSE) } } + if (missing(exposure)){ + stop("Please supply a single exposure.", call. = FALSE) + } + else if(!is.character(exposure) | length(exposure) != 1){ + stop("Please supply a single exposure as a character.", call. = FALSE) + } + if (missing(weights)){ stop("Please supply a list of IPTW weights to trim.", call. = FALSE) } + else if (!inherits(weights, "list")){ + stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + } + + if (missing(outcome)){ + stop("Please supply a single outcome.", call. = FALSE) + } + else if(!is.character(outcome) | length(outcome) != 1){ + stop("Please supply a single outcome as a character.", call. = FALSE) + } + if(!is.numeric(quantile)){ - stop('Please sprovide a numeric quantile value between 0 and 1.', call. = FALSE) + stop('Please provide a numeric quantile value between 0 and 1.', call. = FALSE) } else if (quantile > 1 || quantile < 0) { stop('Please provide a quantile value between 0 and 1.', call. = FALSE) } - if (!inherits(weights, "list")){ - stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + if(!is.logical(verbose)){ + stop("Please set verbose to either TRUE or FALSE.", call. = FALSE) + } + else if(length(verbose) != 1){ + stop("Please provide a single TRUE or FALSE value to verbose.", call. = FALSE) } + if(!is.logical(save.out)){ + stop("Please set save.out to either TRUE or FALSE.", call. = FALSE) + } + else if(length(save.out) != 1){ + stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE) + } + + if(save.out){ weights_dir <- file.path(home_dir, "weights") if (!dir.exists(weights_dir)) { From 2d0e2558a8cd78294730acbab2d3d327ba7522e3 Mon Sep 17 00:00:00 2001 From: "Isabella Stallworthy, PhD" Date: Wed, 27 Sep 2023 10:07:43 -0400 Subject: [PATCH 07/12] adding packages and incorporating noah's push --- .DS_Store | Bin 6148 -> 6148 bytes DESCRIPTION | 9 +- NAMESPACE | 32 ----- R/assessBalance.R | 277 +++++++++++++++++++------------------ R/calcBalStats.R | 178 ++++++++++++------------ R/compareHistories.R | 16 +-- R/createFormulas.R | 5 +- R/createWeights.R | 10 +- R/fitModel.R | 1 + R/make_love_plot.R | 6 +- R/trimWeights.R | 6 +- examplePipelineRevised.Rmd | 8 +- man/make_love_plot.Rd | 1 - 13 files changed, 266 insertions(+), 283 deletions(-) diff --git a/.DS_Store b/.DS_Store index 56e92df7eb62050b606b3449385cdc025432ca63..72a46ace362fbb3fe87eeb8c7771103387b8a97c 100644 GIT binary patch delta 428 zcmZuuF>As=7=3rw(x?RsibJ78e}KWiAeuo+G^8C|Pa{Q%CbXp#Iyls=OC@`ku0oNF z{aGD~Q+*dZXz0Or-#d8k-S_SmjYZ?FCXb3sU0d3hqO$hB#7C^8s3fQId0s*r4zAIH zj}AQCaPOhp38Ud4j0OPCSWTAwn=K;X93wnIV~Q9dv4=6yhB)qF;Q%yo+2a9#(>?7j69QhSgI$RvldL&XG_>D$|1xuv0&b2b`E|H cpvujH9N(EI^NTogFaQA~0|U$E2$40+0LQcuV*mgE diff --git a/DESCRIPTION b/DESCRIPTION index 51d1ff67..6334271b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,15 +22,13 @@ License: GPL-3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.2 +RoxygenNote: 7.2.3 Imports: WeightIt, marginaleffects, - tidyr, stats, ggplot2, matrixStats, - plyr, knitr, survey, jtools, @@ -38,7 +36,10 @@ Imports: stargazer, gtools, cobalt, - stringr + stringr, + huxtable, + officer, + flextable Suggests: testthat (>= 3.0.0) Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index b5f8964d..e5760d91 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,36 +17,4 @@ export(get_reference_values) export(make_love_plot) export(perform_multiple_comparison_correction) export(trimWeights) -importFrom(WeightIt,weightitMSM) -importFrom(dplyr,"%>%") -importFrom(dplyr,bind_rows) -importFrom(dplyr,filter) -importFrom(dplyr,mutate) -importFrom(dplyr,select) -importFrom(ggplot2,aes) -importFrom(ggplot2,element_blank) -importFrom(ggplot2,element_rect) -importFrom(ggplot2,element_text) -importFrom(ggplot2,geom_histogram) -importFrom(ggplot2,geom_point) -importFrom(ggplot2,geom_text) -importFrom(ggplot2,geom_vline) -importFrom(ggplot2,ggplot) -importFrom(ggplot2,ggsave) -importFrom(ggplot2,ggtitle) -importFrom(ggplot2,guide_axis) -importFrom(ggplot2,scale_y_discrete) -importFrom(ggplot2,theme) -importFrom(ggplot2,xlab) -importFrom(ggplot2,xlim) -importFrom(gtools,permutations) -importFrom(jtools,export_summs) -importFrom(knitr,kable) -importFrom(marginaleffects,avg_predictions) -importFrom(marginaleffects,hypotheses) -importFrom(mice,pool) -importFrom(stargazer,stargazer) -importFrom(stats,p.adjust) -importFrom(stringr,str_count) -importFrom(survey,svydesign) importFrom(survey,svyglm) diff --git a/R/assessBalance.R b/R/assessBalance.R index 2072373e..08855ec9 100644 --- a/R/assessBalance.R +++ b/R/assessBalance.R @@ -136,11 +136,11 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, stop("Please supply data as either a dataframe with no missing data or imputed data in the form of a mids object or path to folder with imputed csv datasets.", call. = FALSE) } - else if (!mice::is.mids(data) & !is.data.frame(data) & !inherits(data, "list")) { - stop("Please provide wide data as a 'mids' object, a data frame, or a list of imputed csv files in the 'data' field.", call. = FALSE) + else if (!inherits(data, "mids") && !is.data.frame(data) && + !(is.list(data) && all(vapply(data, is.data.frame, logical(1L))))) { + stop("Please provide either a 'mids' object, a data frame, or a list of imputed csv files in the 'data' field.", call. = FALSE) } - if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } @@ -221,10 +221,13 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, } - folder <- ifelse(type == "prebalance", "prebalance/", "weighted/") + # folder <- ifelse(type == "prebalance", "prebalance/", "weighted/") + + mi <- !is.data.frame(data) + + folder <- switch(type, "prebalance" = "prebalance/", "weighted/") if (save.out){ - #creating required directories balance_dir <- file.path(home_dir, "balance") if (!dir.exists(balance_dir)) { dir.create(balance_dir) @@ -239,7 +242,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, } } - weights_method <- ifelse(type == "prebalance", "no weights", weights[[1]]$method) + weights_method <- switch(type, "prebalance" = "no weights", weights[[1]]$method) form_name <- sapply(strsplit(names(formulas[1]), "_form"), "[",1) @@ -247,28 +250,29 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, ### Getting balance statistics if (type == "prebalance"){ - if (verbose){ - message(paste0("USER ALERT: The following statistics display covariate imbalance at each exposure time point prior to weighting, - using ", form_name, " formulas."), "\n") + if (verbose) { + message(sprintf("USER ALERT: The following statistics display covariate imbalance at each exposure time point prior to weighting, + using %s formulas.\n", + form_name)) cat("\n") } # Running balance stats function, unweighted, on each imputed dataset - if (mice::is.mids(data) | inherits(data, "list")){ + if (mi) { + + if (inherits(data, "mids")) { - if (mice::is.mids(data)){ - m = data$m + m <- data$m bal_stats <- lapply(seq_len(m), function(k) { d <- as.data.frame(mice::complete(data, k)) - if (length(which(is.na(d))) > 0){ + if (anyNA(d)){ stop("This code requires complete data. Consider imputation if missingness < 20% and is reasonably Missing at Random (MAR).", call. = FALSE) } - exposure_type <- ifelse(inherits(d[, paste0(exposure, '.', - exposure_time_pts[1])], "numeric"), "continuous", "binary") + exposure_type <- if (is.numeric(d[, paste0(exposure, '.', exposure_time_pts[1])])) "continuous" else "binary" if (sum(duplicated(d$"ID")) > 0){ stop("Please provide wide imputed datasets with a single row per ID.", call. = FALSE) @@ -278,20 +282,18 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, balance_thresh, k = k, weights = NULL, imp_conf, verbose, save.out) }) } - - else if (inherits(data, "list")){ + else { m = length(data) bal_stats <- lapply(seq_len(m), function(k) { d <- data[[k]] - if (length(which(is.na(d))) > 0){ + if (anyNA(d)){ stop("This code requires complete data. Consider imputation if missingness < 20% and is reasonably Missing at Random (MAR).", call. = FALSE) } - exposure_type <- ifelse(inherits(d[, paste0(exposure, '.', - exposure_time_pts[1])], "numeric"), "continuous", "binary") + exposure_type <- if (is.numeric(d[, paste0(exposure, '.', exposure_time_pts[1])])) "continuous" else "binary" if (sum(duplicated(d$"ID")) > 0){ stop("Please provide wide imputd datasets with a single row per ID.", call. = FALSE) @@ -304,12 +306,12 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, # Save out balance stats for each imputed dataset - bal_stats_all_imp <- do.call(bind_rows, bal_stats) + bal_stats_all_imp <- do.call(dplyr::bind_rows, bal_stats) bal_stats_all_imp <- bal_stats_all_imp[order(bal_stats_all_imp$covariate), ] if (save.out){ - write.csv(bal_stats_all_imp, paste0(home_dir, "/balance/", type, "/", - exposure, "_all_imps_balance_stat_summary.csv"), row.names = FALSE) + write.csv(bal_stats_all_imp, sprintf("%s/balance/%s/%s_all_imps_balance_stat_summary.csv", + home_dir, type, exposure)) if (verbose ){ cat("Pre balance statistics for each imputed dataset have now been saved in the 'balance/prebalance/' folder", "\n") } @@ -328,49 +330,41 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, #adds custom bal thresh info if (!is.null(imp_conf)){ - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = ifelse(all_bal_stats$covariate %in% imp_conf, - # balance_thresh[1], balance_thresh[2])) + all_bal_stats$bal_thresh <- ifelse(all_bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2]) - # all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - # all_bal_stats$bal_thresh, 1, 0) ) all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) } else{ - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = balance_thresh) all_bal_stats$bal_thresh <- balance_thresh - # - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - # all_bal_stats$bal_thresh, 1, 0) ) + all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) } - cat("\n") if (verbose){ + cat("\n") + cat(paste0("*** Averaging Across All Imputations ***"), "\n") } tot_covars <- sapply(strsplit(all_bal_stats$covariate, "\\."), `[`, 1) } + else { - else if (is.data.frame(data)){ - - if(!inherits(data, "data.frame")){ - stop("Please provide data as a data frame, mids object, or list.", call. = FALSE) - } - if (sum(duplicated(data$"ID")) > 0){ + if (sum(duplicated(data[["ID"]])) > 0){ stop("Please provide wide dataset with a single row per ID.", call. = FALSE) } - if (length(which(is.na(data))) > 0){ + + if (anyNA(data)) { stop("This code requires complete data. Consider imputation if missingness < 20% and is reasonably Missing at Random (MAR).", call. = FALSE) } - exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric"), "continuous", "binary") + exposure_type <- if (is.numeric(data[, paste0(exposure, '.', exposure_time_pts[1])])) "continuous" else "binary" bal_stats <- calcBalStats(home_dir, data, formulas, exposure, exposure_time_pts, outcome, balance_thresh, k = 0, weights = NULL, imp_conf, verbose, save.out) @@ -393,21 +387,21 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, # Weighted else if (type == "weighted"){ - if(verbose){ - message(paste0("USER ALERT: The following statistics display covariate imbalance at each exposure time point following IPTW weighting, - using ", form_name, " formulas."), "\n") + if (verbose) { + message(sprintf("USER ALERT: The following statistics display covariate imbalance at each exposure time point following IPTW weighting, + using %s formulas.\n", form_name)) cat("\n") } - if (mice::is.mids(data) | inherits(data, "list")){ + if (mi) { # Running balance stats function, unweighted, on each imputed dataset w/ no weights - if (mice::is.mids(data)){ + if (inherits(data, "mids")) { m <- data$m bal_stats <- lapply(seq_len(m), function(k) { d <- as.data.frame(mice::complete(data, k)) - if (length(which(is.na(d))) > 0){ + if (anyNA(d)){ stop("This code requires complete data. Consider imputation if missingness < 20% and is reasonably Missing at Random (MAR).", call. = FALSE) } @@ -421,18 +415,17 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, balance_thresh, k = k, weights = w, imp_conf, verbose, save.out) }) } + else { + m <- length(data) - - else if (inherits(data, "list")){ - m = length(data) bal_stats <- lapply(seq_len(m), function(k) { d <- data[[k]] - if (length(which(is.na(d))) > 0){ + if (anyNA(d)){ stop("This code requires complete data. Consider imputation if missingness < 20% and is reasonably Missing at Random (MAR).", call. = FALSE) } - if (sum(duplicated(d$"ID")) > 0){ + if (sum(duplicated(d[["ID"]])) > 0){ stop("Please provide wide imputed datasets with a single row per ID.", call. = FALSE) } @@ -445,19 +438,18 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, # Save out balance stats for each imputed dataset - bal_stats_all_imp <- do.call(bind_rows, bal_stats) + bal_stats_all_imp <- do.call(dplyr::bind_rows, bal_stats) bal_stats_all_imp <- bal_stats_all_imp[order(bal_stats_all_imp$covariate), ] if (save.out){ - write.csv(bal_stats_all_imp, paste0(home_dir, "/balance/", type, "/", exposure, - "_all_imps_balance_stat_summary.csv"), row.names = FALSE) + write.csv(bal_stats_all_imp, sprintf("%s/balance/%s/%s_all_imps_balance_stat_summary.csv", + home_dir, type, exposure)) if (verbose){ cat("Weighted balance statistics for each imputed dataset have now been saved in the 'balance/weighted/' folder", "\n") } } - # Gathering imbalanced covariate statistics to average across imputed datasets for the final list/assessment of imbalanced covariates # Averaging across imputed datasets all_bal_stats <- data.frame( @@ -469,22 +461,16 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, # #adds custom bal thresh info if (!is.null(imp_conf)){ - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = ifelse(all_bal_stats$covariate %in% imp_conf, - # balance_thresh[1], balance_thresh[2])) + all_bal_stas$bal_thresh <-ifelse(all_bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2]) - # all_bal_stats <- all_bal_stats %>%dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - # all_bal_stats$bal_thresh, 1, 0) ) all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) } else{ - # all_bal_stats <- all_bal_stats %>% dplyr::mutate(bal_thresh = balance_thresh) all_bal_stats$bal_thresh <- balance_thresh - # all_bal_stats <- all_bal_stats %>% dplyr:: mutate(balanced = ifelse(abs(all_bal_stats$avg_bal) < - # all_bal_stats$bal_thresh, 1, 0) ) all_bal_stats$balanced <- ifelse(abs(all_bal_stats$avg_bal) < all_bal_stats$bal_thresh, 1, 0) } @@ -496,9 +482,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, tot_covars <- sapply(strsplit(all_bal_stats$covariate, "\\."), `[`, 1) } - - - if (is.data.frame(data)){ + else { if (sum(duplicated(data$"ID")) > 0){ stop("Please provide wide dataset with a single row per ID.", call. = FALSE) @@ -528,26 +512,25 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, tot_cons <- tot_covars[!duplicated(tot_covars)] # Total domains/constructs # Make love plot to summarize imbalance at each exposure time point - data_type <- ifelse(mice::is.mids(data) | inherits(data, "list"), "imputed", "single") + data_type <- if (mi) "imputed" else "single" - weights_method = ifelse(type == "weighted", weights[[1]]$method, "no weights") + weights_method <- if (type == "weighted") weights[[1]]$method else "no weights" sapply(seq_along(exposure_time_pts), function(i) { exposure_time_pt <- exposure_time_pts[i] - if (mice::is.mids(data)){ - exposure_type <- ifelse(inherits(mice::complete(data, 1)[, paste0(exposure, '.', - exposure_time_pts[1])], "numeric"), "continuous", "binary") + if (inherits(data, "mids")) { + exposure_type <- if (is.numeric(mice::complete(data, 1)[, paste0(exposure, '.', exposure_time_pts[1])])) "continuous" else "binary" + } - else if (inherits(data, "list")){ - exposure_type <- ifelse(inherits(data[[1]][, paste0(exposure, '.', exposure_time_pts[1])], "numeric"), "continuous", "binary") + else if (!is.data.frame(data)) { + exposure_type <- if (is.numeric(data[[1]][, paste0(exposure, '.', exposure_time_pts[1])])) "continuous" else "binary" } - else if (is.data.frame(data)){ - exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric"), "continuous", "binary") + else { + exposure_type <- if (is.numeric(data[[paste0(exposure, '.', exposure_time_pts[1])]])) "continuous" else "binary" } - # temp <- all_bal_stats %>% filter(exp_time == exposure_time_pt) - temp <- all_bal_stats[all_bal_stats$exp_time == exposure_time_pt, ] + temp <- all_bal_stats[all_bal_stats$exp_time == exposure_time_pt, , drop = FALSE] make_love_plot(home_dir, folder, exposure, exposure_time_pt, exposure_type, k = 0, form_name, temp, data_type, balance_thresh, weights_method, imp_conf, verbose, save.out) @@ -555,22 +538,23 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, if (save.out){ + outfile <- sprintf("%s/balance/%s/%s-%s_all_%s_%s_associations.html", + home_dir, type, exposure, outcome, type, weights_method) + # Save out all correlations/std mean differences - sink(paste0(home_dir, "/balance/", type, "/", exposure, "-", outcome, "_all_", type, "_", weights_method, "_associations.html")) + sink(outfile) stargazer::stargazer(all_bal_stats, type = "html", digits = 2, column.labels = colnames(all_bal_stats), summary = FALSE, - rownames = FALSE, header = FALSE, out = paste0(home_dir, "/balance/", type, "/", exposure, "-", - outcome, "_all_", type,"_", weights_method, "_associations.html")) + rownames = FALSE, header = FALSE, out = outfile) sink() if (verbose){ - if (mice::is.mids(data) | inherits(data, "list")){ - cat(paste0("Summary plots for ", form_name, " ", exposure, - " averaged across all imputations have been saved out for each time point in the 'balance/", - type, "/plots/' folder."), "\n") + if (mi){ + cat(sprintf("Summary plots for %s %s averaged across all imputations have been saved out for each time point in the 'balance/%s/plots/' folder.\n", + form_name, exposure, type)) } - else{ - cat(paste0("Summary plots for ", form_name, " ", exposure, - " have now been saved out for each time point in the 'balance/", type, "/plots/' folder."), "\n") + else { + cat(sprintf("Summary plots for %s %s have now been saved out for each time point in the 'balance/%s/plots/' folder.\n", + form_name, exposure, type)) } } } @@ -578,76 +562,96 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, if (save.out){ # Saving out all pre-balance associations - write.csv(all_bal_stats, paste0(home_dir, "/balance/", type, "/", exposure, "_", type, - "_", weights_method, "_stat_summary.csv"), row.names = FALSE) + write.csv(all_bal_stats, sprintf("%s/balance/%s/%s_%s_%s_stat_summary.csv", + home_dir, type, exposure, type, weights_method), + row.names = FALSE) if (verbose){ - if (mice::is.mids(data) | inherits(data, "list")){ - cat(paste0("Check 'balance/", type, "/' folder for a table of all ", type, " correlations or - standardized mean differences averaged across imputed datasets."), "\n") + if (mi) { + cat(sprintf("Check 'balance/%s/' folder for a table of all %s correlations or + standardized mean differences averaged across imputed datasets.\n", + type, type)) } else { - cat(paste0("Check 'balance/", type, "/' folder for a table of all ", type, " correlations or - standardized mean differences."), "\n") + cat(sprintf("Check 'balance/%s/' folder for a table of all %s correlations or + standardized mean differences.\n", + type, type)) } } } # Finding all imbalanced variables - # unbalanced_covars <- all_bal_stats %>% - # filter(balanced == 0) - unbalanced_covars <- all_bal_stats[all_bal_stats$balanced == 0, ] + unbalanced_covars <- all_bal_stats[all_bal_stats$balanced == 0, , drop = FALSE] unbalanced_constructs <- sapply(strsplit(unbalanced_covars$covariate, "\\."), "[", 1)[!duplicated(sapply(strsplit(unbalanced_covars$covariate, "\\."), "[", 1))] if (verbose){ - if (mice::is.mids(data) | inherits(data, "list")){ - cat(paste0("USER ALERT: Averaging across all imputed datasets for exposure ", exposure, " using the ", - form_name, " formulas and ", weights_method, " :"), "\n") - - cat(paste0("The median absolute value relation between exposure and confounder is ", - round(median(abs(all_bal_stats$avg_bal)), 2), " (range = ", - round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_stats$avg_ba), 2), ")."), "\n") - - if (nrow(unbalanced_covars) > 0){ - cat(paste0("As shown below, the following ", nrow(unbalanced_covars), " covariates across time points out of ", - length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), "%) spanning ", - length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), - "%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to ", - exposure, " of ", round(median(abs(as.numeric(unlist( - # unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced == 0) %>% - # dplyr:: select(avg_bal) - - unbalanced_covars[unbalanced_covars$balanced == 0, ][, "avg_bal"] - )))), 2), " (range=", - round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n") + if (mi) { + cat(sprintf("USER ALERT: Averaging across all imputed datasets for exposure %s using the %s formulas and %s :\n", + exposure, form_name, weights_method)) + + cat(sprintf("The median absolute value relation between exposure and confounder is %s (range = %s-%s).\n", + round(median(abs(all_bal_stats$avg_bal)), 2), + round(min(all_bal_stats$avg_ba), 2), + round(max(all_bal_stats$avg_ba), 2))) + + if (nrow(unbalanced_covars) > 0) { + cat(sprintf("As shown below, the following %s covariates across time points out of %s total (%s%%) spanning %s domains out of %s (%s%%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to %s of %s (range=%s-%s) :\n", + nrow(unbalanced_covars), + length(tot_covars), + round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), + length(unbalanced_constructs), + length(tot_cons), + round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), + exposure, + round(median(abs(as.numeric(unbalanced_covars$avg_bal[unbalanced_covars$balanced == 0]))), 2), + round(min(unbalanced_covars$avg_bal), 2), + round(max(unbalanced_covars$avg_bal), 2))) } else{ cat("There are no imbalanced covariates.", "\n") } } else { - cat(paste0("USER ALERT: For exposure ", exposure, " using the ", form_name," formulas and ", weights_method, " :"), "\n") - - cat(paste0("The median absolute value relation between exposure and confounder is ", - round(median(abs(all_bal_stats$avg_bal)), 2), " (range = ", - round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_stats$avg_ba), 2), ")."), "\n") + # cat(paste0("USER ALERT: For exposure ", exposure, " using the ", form_name," formulas and ", weights_method, " :"), "\n") + cat(sprintf("USER ALERT: For exposure %s using the %s formulas and %s :\n", + exposure, form_name, weights_method)) + + # cat(paste0("The median absolute value relation between exposure and confounder is ", + # round(median(abs(all_bal_stats$avg_bal)), 2), " (range = ", + # round(min(all_bal_stats$avg_ba), 2), "-", round(max(all_bal_stats$avg_ba), 2), ")."), "\n") + cat(sprintf("The median absolute value relation between exposure and confounder is %s (range = %s -%s). \n", + round(median(abs(all_bal_stats$avg_bal)), 2), + round(min(all_bal_stats$avg_ba), 2), + round(max(all_bal_stats$avg_ba), 2))) if (nrow(unbalanced_covars) > 0){ - cat(paste0("As shown below, the following ", nrow(unbalanced_covars), " covariates across time points out of ", - length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), "%) spanning ", - length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), - "%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to ", - exposure, " of ", round(median(abs(as.numeric(unlist( - # unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced == 0) %>% - # dplyr:: select(avg_bal) - unbalanced_covars[unbalanced_covars$balanced == 0, ][, "avg_bal"] - - )))), 2), " (range=", - round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n") + # cat(paste0("As shown below, the following ", nrow(unbalanced_covars), " covariates across time points out of ", + # length(tot_covars), " total (", round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), "%) spanning ", + # length(unbalanced_constructs), " domains out of ", length(tot_cons), " (", round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), + # "%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to ", + # exposure, " of ", round(median(abs(as.numeric(unlist( + # # unbalanced_covars %>% dplyr:: filter(unbalanced_covars$balanced == 0) %>% + # # dplyr:: select(avg_bal) + # unbalanced_covars[unbalanced_covars$balanced == 0, ][, "avg_bal"] + # + # )))), 2), " (range=", + # round(min(unbalanced_covars$avg_bal), 2), "-", round(max(unbalanced_covars$avg_bal), 2), ") : "), "\n") + + cat(sprintf("As shown below, the following %s covariates across time points out of %s total (%s%%) spanning %s domains out of %s (%s%%) are imbalanced with a remaining median absolute value correlation/std mean difference in relation to %s of %s (range=%s-%s) :\n", + nrow(unbalanced_covars), + length(tot_covars), + round(nrow(unbalanced_covars) / length(tot_covars) * 100, 2), + length(unbalanced_constructs), + length(tot_cons), + round(length(unbalanced_constructs) / length(tot_cons) * 100, 2), + exposure, + round(median(abs(as.numeric(unlist(unbalanced_covars[unbalanced_covars$balanced == 0, ][, "avg_bal"])))), 2), + round(min(unbalanced_covars$avg_bal), 2), + round(max(unbalanced_covars$avg_bal), 2))) } else{ cat("There are no imbalanced covariates.", "\n") } @@ -663,9 +667,12 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, } if (save.out){ # Save out only imbalanced covariates - sink(paste0(home_dir, "/balance/", type, "/", exposure, "-", outcome, "_",type,"_", weights_method, "_all_covariates_imbalanced.html")) - stargazer(unbalanced_covars, type = "html", digits = 2, column.labels = colnames(unbalanced_covars), summary = FALSE, rownames = FALSE, header = FALSE, - out = paste0(home_dir, "/balance/", type, "/", exposure, "-", outcome, "_", type, "_", weights_method, "_all_covariates_imbalanced.html")) + outfile <- sprintf("%s/balance/%s/%s-%s_%s_%s_all_covariates_imbalanced.html", + home_dir, type, exposure, outcome, type, weights_method) + + sink(outfile) + stargazer::stargazer(unbalanced_covars, type = "html", digits = 2, column.labels = colnames(unbalanced_covars), summary = FALSE, rownames = FALSE, header = FALSE, + out = outfile) sink() } } diff --git a/R/calcBalStats.R b/R/calcBalStats.R index d65c0268..3b9ac797 100644 --- a/R/calcBalStats.R +++ b/R/calcBalStats.R @@ -102,9 +102,9 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ } - folder <- ifelse(weighted == 0, "prebalance/", "weighted/") + folder <- if (weighted) "weighted/" else "prebalance/" - data_type <- ifelse(k == 0, "single", "imputed") + data_type <- if (k == 0) "single" else "imputed" if (data_type == "imputed" & verbose){ cat(paste0("**Imputation ", k, "**"), "\n") @@ -151,7 +151,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # GETTING BALANCE STATS FOR T=1 W/ NO EXPOSURE HISTORY (ok to standardize immediately) if (length(lagged_time_pts) == 0) { - # temp <- data temp <- data2 # Unweighted pre-balance checking @@ -260,7 +259,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (length(lagged_time_pts) > 0) { # Merge data with prop_weights by ID - # temp <- merge(data, prop_weights, by = "ID", all.x = TRUE) temp <- merge(data2, prop_weights, by = "ID", all.x = TRUE) #data with factors split up @@ -279,20 +277,21 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ " at time point ", exposure_time_pt)) } - temp <- temp[!temp$history %in% ommitted_histories, ] + temp <- temp[!temp$history %in% ommitted_histories, , drop = FALSE] } #ends hist exc # Unweighted pre-balance checking if (weighted == 0) { # no IPTW weighting but weighting on history if (exposure_type == "continuous") { bal_stats <- sapply(sort(unique(temp$history)), function(i) { # finding balance by history - # temp2 <- temp %>% - # dplyr::filter(history == i) - temp2 <- temp[temp$history == i, ] - #should be same length as covars (already have factors split up) - cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance - subset = temp2$history[temp2$history == i] == i) # subsetting by that history + temp2 <- temp[temp$history == i, , drop = FALSE ] + + cobalt::col_w_cov(temp2[covars], temp2[[paste0(exposure, ".", exposure_time_pt)]], std = FALSE) + + # #should be same length as covars (already have factors split up) + # cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance + # subset = temp2$history[temp2$history == i] == i) # subsetting by that history }) # getting weighted mean across histories (cols), weighting by proportion of those w/ that same history @@ -302,7 +301,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) - # bal_stats <- bal_stats %>% bal_stats$std_bal_stats <- weighted_bal_stats / (sapply(seq(nrow(bal_stats)), function(x) { #issue: looking in data for unweighted vals but factors have additional vars sd(as.numeric(data2[, rownames(bal_stats)[x]]), na.rm = TRUE) }) *# unweighted covar sd @@ -316,8 +314,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ call. = FALSE) } - # bal_stats <- bal_stats %>% - # dplyr::select(contains(c("std"))) bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) @@ -325,9 +321,8 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ else if (exposure_type == "binary") { bal_stats <- sapply(sort(unique(temp$history)), function(i) { - # temp2 <- temp %>% - # dplyr::filter(history == i) - temp2 <- temp[temp$history == i, ] + + temp2 <- temp[temp$history == i, , drop = FALSE ] cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference subset = temp2$history[temp2$history == i] == i) # subsetting by that history @@ -341,7 +336,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after finding weighted balance stats - # bal_stats <- bal_stats %>% bal_stats$std_bal_stats <- weighted_bal_stats/ sapply(seq(ncol(data[, covars])), function(x){ sqrt(mean( #dividing by pool SD estimate (unadjusted) @@ -361,9 +355,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # For a weighted_bal_stat of 0, make std stat also 0 so as not to throw an error bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 - - # bal_stats <- bal_stats %>% - # dplyr::select(contains(c("std"))) bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) } #ends binary @@ -375,10 +366,9 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (exposure_type == "continuous") { # finds balance for each covariate clustered/subset by history - bal_stats <- sapply(sort(unique(temp$history)), function(i) { - # temp2 <- temp %>% - # dplyr::filter(history == i) - temp2 <- temp[temp$history == i, ] + bal_stats <- sapply(sort(unique(temp$history), method = "radix"), function(i) { + + temp2 <- temp[temp$history == i, , drop = FALSE ] cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance subset = temp2$history[temp2$history == i] == i, # subsetting by that history @@ -417,10 +407,9 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ else if (exposure_type == "binary") { # finds balance for each covariate clustered/subset by history - bal_stats <- sapply(sort(unique(temp$history)), function(i) { - # temp2 <- temp %>% - # dplyr::filter(history == i) - temp2 <- temp[temp$history == i, ] + bal_stats <- sapply(sort(unique(temp$history, method = "radix")), function(i) { + + temp2 <- temp[temp$history == i, , drop = FALSE ] cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference subset = temp2$history[temp2$history == i] == i, # subsetting by that history @@ -441,7 +430,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ bal_stats <- as.data.frame(cbind(bal_stats, weighted_bal_stats)) # standardizing balance statistics after finding weighted balance stats - # bal_stats <- bal_stats %>% bal_stats$std_bal_stats <- weighted_bal_stats/ sapply(seq(ncol(data[, covars])), function(x){ sqrt(mean( #dividing by pool SD estimate (unadjusted) @@ -458,11 +446,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # For a weighted_bal_stat of 0, make std stat also 0 so as not to throw an error bal_stats$std_bal_stats[is.nan(bal_stats$std_bal_stats)] <- 0 - - # bal_stats <- bal_stats %>% - # dplyr::select(contains(c("std"))) bal_stats <- subset(bal_stats, select = grepl("std", colnames(bal_stats) )) - } #ends binary } #ends weighted @@ -479,40 +463,31 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ #averages across factor levels to create one bal stat per factor variable? data$ID <- as.numeric(data$ID) f_vars <- colnames(data)[sapply(data, is.factor)] - f_stats <- bal_stats[rownames(bal_stats)[sapply(strsplit(rownames(bal_stats), "_"), "[", 1) %in% f_vars], ] - f_stats$name <- sapply(strsplit(rownames(f_stats), "_"), "[", 1) - # test <- f_stats %>% dplyr::group_by(name) %>% dplyr::summarize(new = mean(std_bal_stats)) - test <- aggregate(std_bal_stats ~ name, data = f_stats, - FUN = function(x) new = mean(x)) - - colnames(test) <- c("covariate", "std_bal_stats") - new <- data.frame(std_bal_stats = test$std_bal_stats, - covariate = test$covariate) - rownames(new) <- new$covariate - bal_stats <- rbind(bal_stats[rownames(bal_stats)[!sapply(strsplit(rownames(bal_stats), "_"), "[", 1) %in% f_vars], ], - new) + + if(length(f_vars) > 0){ + f_stats <- bal_stats[rownames(bal_stats)[sapply(strsplit(rownames(bal_stats), "_"), "[", 1) %in% f_vars], ] + f_stats$name <- sapply(strsplit(rownames(f_stats), "_"), "[", 1) + test <- aggregate(std_bal_stats ~ name, data = f_stats, + FUN = function(x) new = mean(x)) + + colnames(test) <- c("covariate", "std_bal_stats") + new <- data.frame(std_bal_stats = test$std_bal_stats, + covariate = test$covariate) + rownames(new) <- new$covariate + bal_stats <- rbind(bal_stats[rownames(bal_stats)[!sapply(strsplit(rownames(bal_stats), "_"), "[", 1) %in% f_vars], ], + new) + } #adds custom bal thresh info if (!is.null(imp_conf)){ - # bal_stats <- bal_stats %>% - # dplyr::mutate(bal_thresh = ifelse(bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2])) bal_stats$bal_thresh <- ifelse(bal_stats$covariate %in% imp_conf, balance_thresh[1], balance_thresh[2]) - # bal_stats <- bal_stats %>% - # dplyr:: mutate(balanced = ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) ) bal_stats$balanced <- ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) } else{ - # bal_stats <- bal_stats %>% - # dplyr::mutate(bal_thresh = balance_thresh) bal_stats$bal_thresh <- balance_thresh - # bal_stats <- bal_stats %>% - # dplyr:: mutate(balanced = ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) ) bal_stats$balanced <- ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) } - # bal_stats <- bal_stats %>% - # dplyr::mutate(exposure = exposure, exp_time = exposure_time_pt) %>% - # dplyr::mutate(covar_time = sapply(strsplit(covariate, "\\."), "[", 2)) bal_stats$exposure <- exposure bal_stats$exp_time <- exposure_time_pt bal_stats$covar_time <- sapply(strsplit(bal_stats$covariate, "\\."), "[", 2) @@ -541,18 +516,12 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # Summarizing balance - # bal_summary_exp <- all_bal_stats %>% - # dplyr::group_by(exp_time) %>% - # dplyr::summarize(balanced_n = sum(balanced == 1), # Tallying balanced covariates - # imbalanced_n = sum(balanced == 0), # Tallying imbalanced covariates - # n = dplyr::n()) - - bal_summary_exp <- as.data.frame(aggregate(balanced ~ exp_time, data = all_bal_stats, - FUN = function(x) c(balanced_n = sum(x == 1), - imbalanced_n = sum(x == 0), - n = length(x)) )) - bal_summary_exp <- do.call(data.frame, bal_summary_exp) - colnames(bal_summary_exp) <- c("exp_time", "balanced_n", "imbalanced_n", "n") + bal_summary_exp <- as.data.frame(aggregate(balanced ~ exp_time, data = all_bal_stats, + FUN = function(x) c(balanced_n = sum(x == 1), + imbalanced_n = sum(x == 0), + n = length(x)) )) + bal_summary_exp <- do.call(data.frame, bal_summary_exp) + colnames(bal_summary_exp) <- c("exp_time", "balanced_n", "imbalanced_n", "n") if (save.out){ @@ -605,29 +574,66 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (verbose){ if (data_type == "imputed"){ - cat(paste0("USER ALERT: For exposure ", exposure, " imputation ", k, " using ", weights_method, " and ", form_name, " formulas: "), "\n") - cat(paste0("The median absolute value relation between exposure and confounder is ", round(median(abs(all_bal_stats$std_bal_stats)), 2), " (range = ", - round(min(all_bal_stats$std_bal_stats), 2), "-", round(max(all_bal_stats$std_bal_stats), 2), ")."), "\n") + # cat(paste0("USER ALERT: For exposure ", exposure, " imputation ", k, " using ", weights_method, " and ", form_name, " formulas: "), "\n") + cat(sprintf("USER ALERT: For exposure %s imputation %s using %s and %s formulas: \n", + exposure, k, weights_method, form_name)) + + # cat(paste0("The median absolute value relation between exposure and confounder is ", round(median(abs(all_bal_stats$std_bal_stats)), 2), " (range = ", + # round(min(all_bal_stats$std_bal_stats), 2), "-", round(max(all_bal_stats$std_bal_stats), 2), ")."), "\n") + cat(sprintf("The median absolute value relation between exposure and confounder is %s (range = %s - %s).\n", + round(median(abs(all_bal_stats$std_bal_stats)), 2), + round(min(all_bal_stats$std_bal_stats), 2), + round(max(all_bal_stats$std_bal_stats), 2))) + + # cat(paste0("As shown below, ", imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced, + # "%) covariates across time points, corresponding to ", + # remaining_imbalanced_domains, " out of ", total_domains, + # " domains, remain imbalanced with a remaining median absolute value correlation/std mean difference of ", + # remaining_avg_abs_corr, " (range= ", remaining_corr_range, "):"), "\n") + + cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %sout of %s domains, + remain imbalanced with a remaining median absolute value correlation/std mean difference of %s (range= %s):\n", + imbalanced_covars, + total_covars, + percentage_imbalanced, + remaining_imbalanced_domains, + total_domains, + remaining_avg_abs_corr, + remaining_corr_range)) - cat(paste0("As shown below, ", imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced, - "%) covariates across time points, corresponding to ", - remaining_imbalanced_domains, " out of ", total_domains, - " domains, remain imbalanced with a remaining median absolute value correlation/std mean difference of ", - remaining_avg_abs_corr, " (range= ", remaining_corr_range, "):"), "\n") cat("\n") - cat(knitr::kable(bal_summary_exp, caption = paste0("Imbalanced Covariates for imputation ", k, " using ", - weights_method, " and ", form_name, " formulas"), format = 'pipe'), sep = "\n") + cat(knitr::kable(bal_summary_exp, caption = + # paste0("Imbalanced Covariates for imputation ", k, " using ", + # weights_method, " and ", form_name, " formulas"), + sprintf("Imbalanced Covariates for imputation %s using %s and %s formulas", + k, weights_method, form_name), + format = 'pipe'), sep = "\n") + cat("\n") cat("\n") } else { - cat(paste0("As shown below, for exposure ", exposure, " using ", weights_method, ", and ", form_name, " formulas, ", - imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced, "%) covariates across time points corresponding to ", - remaining_imbalanced_domains, " out of ", total_domains, - " domains remain imbalanced with a remaining average absolute value correlation/std mean difference of ", - remaining_avg_abs_corr, " (range= ", remaining_corr_range, ") :"), "\n") + # cat(paste0("As shown below, for exposure ", exposure, " using ", weights_method, ", and ", form_name, " formulas, ", + # imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced, "%) covariates across time points corresponding to ", + # remaining_imbalanced_domains, " out of ", total_domains, + # " domains remain imbalanced with a remaining average absolute value correlation/std mean difference of ", + # remaining_avg_abs_corr, " (range= ", remaining_corr_range, ") :"), "\n") + + cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %sout of %s domains, + remain imbalanced with a remaining median absolute value correlation/std mean difference of %s (range= %s):\n", + imbalanced_covars, + total_covars, + percentage_imbalanced, + remaining_imbalanced_domains, + total_domains, + remaining_avg_abs_corr, + remaining_corr_range)) + cat("\n") - cat(knitr::kable(bal_summary_exp, caption = paste0("Imbalanced covariates using ", - weights_method, " and ", form_name, " formulas"), format = 'pipe'), sep = "\n") + cat(knitr::kable(bal_summary_exp, caption = + # paste0("Imbalanced covariates using ", + # weights_method, " and ", form_name, " formulas"), + sprintf("Imbalanced covariates using %s and %s formulas", weights_method, form_name), + format = 'pipe'), sep = "\n") cat("\n") cat("\n") } diff --git a/R/compareHistories.R b/R/compareHistories.R index 6628ec27..3767d4fe 100644 --- a/R/compareHistories.R +++ b/R/compareHistories.R @@ -618,18 +618,18 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod comparisons <- comparisons[order(comparisons$dose), ] if (length(colors) > 1) { # If user input a list of colors - p <- ggplot2::ggplot(data = comparisons, aes(x = estimate, y = history, color = dose)) + + p <- ggplot2::ggplot(data = comparisons, ggplot2::aes(x = estimate, y = history, color = dose)) + ggplot2::geom_point(size = 5) + ggplot2::scale_color_manual(values = colors) + ggplot2::scale_y_discrete(limits = c(as.character(comparisons$history)), expand = c(0, 0.2)) + - ggplot2::geom_errorbarh(aes(xmin = low_ci, xmax = high_ci), height = 0.6) + + ggplot2::geom_errorbarh(ggplot2::aes(xmin = low_ci, xmax = high_ci), height = 0.6) + ggplot2::xlab(paste0("Predicted ", out_lab, " Value")) + ggplot2::ylab(paste0(exp_lab, " Exposure History")) + ggplot2::xlim(min(comparisons$low_ci) - 1 * sd(comparisons$low_ci), max(comparisons$high_ci) + 1 * sd(comparisons$high_ci)) + ggplot2::theme(text = ggplot2::element_text(size = 14)) + - ggplot2::theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), - panel.background = element_blank(), axis.line = ggplot2::element_line(colour = "black")) + ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = element_blank(), + panel.background = ggplot2::element_blank(), axis.line = ggplot2::element_line(colour = "black")) if (save.out){ ggplot2::ggsave(paste0(home_dir, "/plots/", exposure, "-", outcome, ".jpeg"), plot = p) @@ -639,18 +639,18 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod } } else { # If user lists a palette (default) - p <- ggplot2::ggplot(data = comparisons, aes(x = estimate, y = history, color = dose)) + + p <- ggplot2::ggplot(data = comparisons, ggplot2::aes(x = estimate, y = history, color = dose)) + ggplot2::geom_point(size = 5) + ggplot2::scale_colour_brewer(palette = colors) + ggplot2::scale_y_discrete(limits = c(as.character(comparisons$history)), expand = c(0, 0.2)) + - ggplot2::geom_errorbarh(aes(xmin = low_ci, xmax = high_ci), height = 0.6) + + ggplot2::geom_errorbarh(ggplot2::aes(xmin = low_ci, xmax = high_ci), height = 0.6) + ggplot2::xlab(paste0("Predicted ", out_lab, " Value")) + ggplot2::ylab(paste0(exp_lab, " Exposure History")) + ggplot2::xlim(min(comparisons$low_ci) - 1 * sd(comparisons$low_ci), max(comparisons$high_ci) + 1 * sd(comparisons$high_ci)) + ggplot2::theme(text = ggplot2::element_text(size = 14)) + - ggplot2::theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), - panel.background = element_blank(), axis.line = ggplot2::element_line(colour = "black")) + + ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_blank(), axis.line = ggplot2::element_line(colour = "black")) + ggplot2::guides(fill = ggplot2::guide_legend(title="Dosage")) if (save.out){ diff --git a/R/createFormulas.R b/R/createFormulas.R index 23e2be1c..9e0210dc 100644 --- a/R/createFormulas.R +++ b/R/createFormulas.R @@ -247,15 +247,16 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, # as.numeric(covar_time) > 0) %>% # Finds any lagged imbalanced covars # dplyr::select(covariate) - new <- bal_stats[bal_stats$balanced ==0 & + new <- bal_stats[bal_stats$balanced == 0 & bal_stats$exp_time == time & as.numeric(bal_stats$covar_time) < exposure_time_pts[x-1] & as.numeric(bal_stats$covar_time) > 0, ] - new <- new[, "covariate"] if (nrow(new) > 0) { + new <- new[, "covariate"] + new <- as.character(unlist(new)) time_var_include <- c(time_var_include, new) diff --git a/R/createWeights.R b/R/createWeights.R index 888fd3c9..d2f549a9 100644 --- a/R/createWeights.R +++ b/R/createWeights.R @@ -172,7 +172,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = calculate_weights <- function(data, form, weights_method, SL.library) { if(weights_method == "super"){ - fit <- weightitMSM(form, + fit <- WeightIt::weightitMSM(form, data = data, method = weights_method, stabilize = TRUE, @@ -183,7 +183,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = over = FALSE) } else{ - fit <- weightitMSM(form, + fit <- WeightIt::weightitMSM(form, data = data, method = weights_method, stabilize = TRUE, @@ -227,7 +227,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } # Writes image of the histogram of weights to assess heavy tails - p <- ggplot2::ggplot(data = as.data.frame(fit$weight), aes(x = fit$weight)) + + p <- ggplot2::ggplot(data = as.data.frame(fit$weight), ggplot2::aes(x = fit$weight)) + ggplot2::geom_histogram(color = 'black', bins = 15) + ggplot2::xlab("Weights") + ggplot2::ggtitle(paste0("Distribution of ", weights_method, " weights")) @@ -283,7 +283,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } # Writes image of the histogram of weights to assess heavy tails - p <- ggplot2::ggplot(data = as.data.frame(fit$weight), aes(x = fit$weight)) + + p <- ggplot2::ggplot(data = as.data.frame(fit$weight), ggplot2::aes(x = fit$weight)) + ggplot2::geom_histogram(color = 'black', bins = 15) + ggplot2::xlab("Weights") + ggplot2::ggtitle(paste0("Distribution of ", weights_method, " weights")) @@ -340,7 +340,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } # Writes image of the histogram of weights to assess heavy tails - p <- ggplot2::ggplot(data = as.data.frame(weights[[1]]$weights), aes(x = weights[[1]]$weights)) + + p <- ggplot2::ggplot(data = as.data.frame(weights[[1]]$weights), ggplot2::aes(x = weights[[1]]$weights)) + ggplot2::geom_histogram(color = 'black', bins = 15) + ggplot2::xlab("Weights") + ggplot2::ggtitle(paste0("Distribution of ", weights_method, " weights")) diff --git a/R/fitModel.R b/R/fitModel.R index 17f50b56..6340a7b9 100644 --- a/R/fitModel.R +++ b/R/fitModel.R @@ -5,6 +5,7 @@ #' weights. #' @seealso {[survey::svyglm()] for more on family/link specifications, #' } +#' @importFrom survey svyglm #' @param home_dir path to home directory (required if 'save.out' = TRUE) #' @param data data in wide format as: a data frame, list of imputed data #' frames, or mids object diff --git a/R/make_love_plot.R b/R/make_love_plot.R index d83bb653..ccb0af08 100644 --- a/R/make_love_plot.R +++ b/R/make_love_plot.R @@ -105,9 +105,9 @@ make_love_plot <- function(home_dir, folder, exposure, exposure_time_pt, exposur } # Make love plot per exposure time point - lp <- ggplot2::ggplot(aes(x = avg_bal, y = reorder(as.factor(covariate), avg_bal)), data = balance_stats) + - ggplot2::geom_point(aes(y = reorder(as.factor(covariate), avg_bal), x = avg_bal, fill = "white", alpha = 1)) + - ggplot2::geom_text(aes(label = labels, hjust = -0.2, vjust = 0.2), size = 1.5, color = "red") + + lp <- ggplot2::ggplot(ggplot2::aes(x = avg_bal, y = reorder(as.factor(covariate), avg_bal)), data = balance_stats) + + ggplot2::geom_point(ggplot2::aes(y = reorder(as.factor(covariate), avg_bal), x = avg_bal, fill = "white", alpha = 1)) + + ggplot2::geom_text(ggplot2::aes(label = labels, hjust = -0.2, vjust = 0.2), size = 1.5, color = "red") + ggplot2::xlab(x_lab) + ggplot2::ylab("Covariate") + ggplot2::xlim(min_val, max_val) + diff --git a/R/trimWeights.R b/R/trimWeights.R index d3465f35..763eaeea 100644 --- a/R/trimWeights.R +++ b/R/trimWeights.R @@ -145,7 +145,7 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v } # Save histogram of new weights - p <- ggplot2::ggplot(as.data.frame(t), aes(x = t)) + + p <- ggplot2::ggplot(as.data.frame(t), ggplot2::aes(x = t)) + ggplot2::geom_histogram(color = 'black', bins = 15) if(verbose){ @@ -181,9 +181,9 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v } # Save histogram of new weights - p <- ggplot2::ggplot(as.data.frame(t), aes(x = t)) + + p <- ggplot2::ggplot(as.data.frame(t), ggplot2::aes(x = t)) + ggplot2::geom_histogram(color = 'black', bins = 15) + - ggtitle(paste0("Weights trimmed at the ", quantile, "th value")) + ggplot2::ggtitle(paste0("Weights trimmed at the ", quantile, "th value")) if(verbose){ print(p) diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd index 8a48982d..3519bcd9 100644 --- a/examplePipelineRevised.Rmd +++ b/examplePipelineRevised.Rmd @@ -56,7 +56,7 @@ library(devMSMsHelpers) #prior to re-installing using the code above. Sorry, this is annoying! #conducting package checks & tests from documentation --just for IS to run; comment out when testing the workflow -# devtools::check() +devtools::check() ``` @@ -198,9 +198,9 @@ data <- mice::complete(imputed_data, 1) #just for testing purposes #IS testing multi-level factors data$"PmMrSt2" = as.factor(data$"PmMrSt2") data$HomeOwnd = as.factor(data$HomeOwnd) -# library(dplyr) -# data <- data %>% -# dplyr::select(-c(contains(c(":", "Childhood", "Infancy", "Toddlerhood", "pcx")))) #removing these --came from old dataset +library(dplyr) +data <- data %>% + dplyr::select(-c(contains(c(":", "Childhood", "Infancy", "Toddlerhood", "pcx")))) #removing these --came from old dataset ``` diff --git a/man/make_love_plot.Rd b/man/make_love_plot.Rd index 0e648a1c..d900c5ae 100644 --- a/man/make_love_plot.Rd +++ b/man/make_love_plot.Rd @@ -91,7 +91,6 @@ b <- assessBalance(data = test, exposure = "A", exposure_time_pts = c(1, 2, 3), outcome = "D.3", - tv_confounders = c("A.1", "A.2", "A.3", "B.1", "B.2", "B.3"), type = "weighted", weights = w, formulas = f, From 557b202723b397479e1f7390ef1e49af5237b467 Mon Sep 17 00:00:00 2001 From: "Isabella Stallworthy, PhD" Date: Wed, 27 Sep 2023 11:43:53 -0400 Subject: [PATCH 08/12] noah's push edits --- R/calcBalStats.R | 87 +++++++++++++++++++++++++++-------------- R/compareHelpers.R | 7 ++++ R/createFormulas.R | 58 ++++++++++++++++----------- R/eval_hist.R | 97 ++++++++++++++++++++++++++++++++-------------- R/fitModel.R | 40 ++++++++++++++++--- R/getModel.R | 27 ++++++++----- R/make_love_plot.R | 33 +++++++++------- R/trimWeights.R | 74 ++++++++++++++++++++++++----------- 8 files changed, 291 insertions(+), 132 deletions(-) diff --git a/R/calcBalStats.R b/R/calcBalStats.R index 3b9ac797..edea6437 100644 --- a/R/calcBalStats.R +++ b/R/calcBalStats.R @@ -247,11 +247,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ data$flag <- NULL # resets flag } # ends history loop (e.g., "l-l-l") - #summarize contributors to each history - # prop_sum <- prop_weights %>% - # dplyr::group_by(as.factor(history)) %>% - # dplyr::summarize(n = dplyr::n()) - prop_sum <- aggregate(exposure ~ as.factor(history), data = prop_weights, FUN = function(x) n = length(x)) @@ -267,14 +262,20 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ ommitted_histories <- as.character(as.data.frame(prop_sum)[prop_sum$n == 1, 1]) if (data_type == "imputed"){ - cat(paste0("USER ALERT: the following history/histories, ", ommitted_histories, - ", has/have been omitted from balance checking for exposure ", exposure, - ", imputation ", k, ", at time point ", exposure_time_pt)) + # cat(paste0("USER ALERT: the following history/histories, ", ommitted_histories, + # ", has/have been omitted from balance checking for exposure ", exposure, + # ", imputation ", k, ", at time point ", exposure_time_pt)) + cat(sprintf("USER ALERT: the following history/histories, %s has/have been omitted from + balance checking for exposure %s imputation %s at time point %s:", + omitted_histories, exposure, k, exposure_time_pt)) } else{ - cat(paste0("USER ALERT: the following history/histories, ", ommitted_histories, - ", has/have been omitted from balance checking for exposure ", exposure, - " at time point ", exposure_time_pt)) + # cat(paste0("USER ALERT: the following history/histories, ", ommitted_histories, + # ", has/have been omitted from balance checking for exposure ", exposure, + # " at time point ", exposure_time_pt)) + cat(sprintf("USER ALERT: the following history/histories, %s has/have been omitted from + balance checking for exposure %s at time point %s:", + omitted_histories, exposure, exposure_time_pt)) } temp <- temp[!temp$history %in% ommitted_histories, , drop = FALSE] @@ -504,13 +505,22 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (verbose & save.out) { if (data_type == "imputed"){ - cat(paste0("For each time point and imputation, ", gsub("/", "", folder), " summary plots for ", - form_name, " formulas weighting method ", - weights_method, " have now been saved in the '", folder, "plots/' folder."), "\n") + # cat(paste0("For each time point and imputation, ", gsub("/", "", folder), " summary plots for ", + # form_name, " formulas weighting method ", + # weights_method, " have now been saved in the '", folder, "plots/' folder."), "\n") + + cat(paste0("For each time point and imputation, %s summary plots for %s + formulas weighting method %s have now been saved in the %s plots/' folder.\n", + gsub("/", "", folder), form_name, weights_method, folder)) + } - else {cat(paste0(" For each time point, ", gsub("/", "", folder), " summary plots for ", - form_name, " formulas and weighting method ", - weights_method, " have now been saved in the '", folder, "plots/' folder."), "\n") + else { + # cat(paste0(" For each time point, ", gsub("/", "", folder), " summary plots for ", + # form_name, " formulas and weighting method ", + # weights_method, " have now been saved in the '", folder, "plots/' folder."), "\n") + cat(paste0("For each time point, %s summary plots for %s + formulas weighting method %s have now been saved in the %s plots/' folder.\n", + gsub("/", "", folder), form_name, weights_method, folder)) } } @@ -525,25 +535,42 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (save.out){ - write.csv(bal_summary_exp, paste0(home_dir, "/balance/", folder, form_name, "_", exposure, "_", k, "_", - weights_method, "_balance_stat_summary.csv")) - write.csv(all_prop_weights, paste0(home_dir, "/balance/", folder, "/", form_name, "_form_", exposure, "_", k, - "_", weights_method, "_history_sample_weight.csv")) + write.csv(bal_summary_exp, + # paste0(home_dir, "/balance/", folder, form_name, "_", exposure, "_", k, "_", + # weights_method, "_balance_stat_summary.csv")) + sprintf("%s/balance/%s%s_%s_%s_%s_balance_stat_summary.csv", + home_dir,folder, form_name, exposure, k, weights_method)) + + write.csv(all_prop_weights, + # paste0(home_dir, "/balance/", folder, "/", form_name, "_form_", exposure, "_", k, + # "_", weights_method, "_history_sample_weight.csv")) + sprintf("%s/balance/%s/%s_form_%s_%s_%s_history_sample_weight.csv", + home_dir, folder, form_name, exposure, k, weights_method)) + if (verbose){ cat("\n") if (data_type == "imputed"){ - cat(paste0("Balance statistics using ", form_name, " formulas for ", exposure, ", imputation ", k, ", using ", - weights_method, " have been saved in the 'balance/", folder, "' folder"), "\n") - cat(paste0("Sampling weights ", "using the ", form_name, " for ", exposure, ", imputation ", k, - " have been saved in the 'balance/", folder, "' folder"), "\n") + cat(sprintf("Balance statistics using %s formulas for %s imputation %s, using + %s have been saved in the 'balance/%s' folder. \n", + form_name, exposure, k, weights_method, folder)) + + # cat(paste0("Sampling weights ", "using the ", form_name, " for ", exposure, ", imputation ", k, + # " have been saved in the 'balance/", folder, "' folder"), "\n") + cat(sprintf("Sampling weights using the %s for %s imputation %s have been saved in the 'balance/%s' folder., \n", + form_name, exposure, k, folder)) } else{ - cat(paste0("Balance statistics using ", form_name, " formulas for ", exposure, "using ", - weights_method, " have been saved in the 'balance/", folder, "' folder"), "\n") - - cat(paste0("Sampling weights ", "using the ", form_name, " for ", exposure, - " have been saved in the 'balance/", folder, "' folder"), "\n") + # cat(paste0("Balance statistics using ", form_name, " formulas for ", exposure, "using ", + # weights_method, " have been saved in the 'balance/", folder, "' folder"), "\n") + cat(sprintf("Balance statistics using %s formulas for %s using + %s have been saved in the 'balance/%s' folder. \n", + form_name, exposure, weights_method, folder)) + + # cat(paste0("Sampling weights ", "using the ", form_name, " for ", exposure, + # " have been saved in the 'balance/", folder, "' folder"), "\n") + cat(sprintf("Sampling weights using the %s for %s have been saved in the 'balance/%s' folder., \n", + form_name, exposure, folder)) } cat("\n") } diff --git a/R/compareHelpers.R b/R/compareHelpers.R index 68973bfd..1779fdea 100644 --- a/R/compareHelpers.R +++ b/R/compareHelpers.R @@ -17,9 +17,11 @@ #' reference = "h-h-h" ) get_reference_values <- function(d, reference) { + ref_vals <- sapply(seq_len(length(unlist(strsplit(reference, "-")))), function(x) { d[x, unlist(strsplit(reference, "-"))[x]] }) + ref_vals } @@ -41,11 +43,13 @@ get_reference_values <- function(d, reference) { #' r <- get_comparison_values(d = d, #' comp_histories = c("h-h-h", "h-h-l")) get_comparison_values <- function(d, comp_histories) { + comp_vals <- sapply(comp_histories, function(comp) { sapply(seq_len(length(unlist(strsplit(comp, "-")))), function(x) { d[x, unlist(strsplit(comp, "-"))[x]] }) }) + t(comp_vals) } @@ -60,6 +64,7 @@ get_comparison_values <- function(d, comp_histories) { #' @return contrasts #' @export create_custom_contrasts <- function(d, reference, comp_histories, exposure, preds) { + if (is.na(reference) | is.null(comp_histories)) { return(NULL) # Invalid input, return early } @@ -121,6 +126,7 @@ create_custom_comparisons <- function(preds, ref_vals, comp_vals, exposure) { #' @export add_histories <- function(p, d) { + if((is.list(p)) & length(p) == 1){ history <- matrix(data = NA, nrow = nrow(p[[1]]), ncol = 1) # Get histories from the first element p <- p[[1]] @@ -150,6 +156,7 @@ add_histories <- function(p, d) { } else { #comps for (i in seq_len(nrow(p))) { + temp <- as.character(p$term[i]) pair <- lapply(1:2, function(y) { a <- sapply(strsplit(temp, " - "), "[", y) diff --git a/R/createFormulas.R b/R/createFormulas.R index 9e0210dc..8fb1bef2 100644 --- a/R/createFormulas.R +++ b/R/createFormulas.R @@ -207,49 +207,50 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, #identifying lagged tv confounders relative to time point based on formula type if (type == "full"){ + if(verbose){ message("USER ALERT: Please manually inspect the full balancing formula below:") } + time_var_include <- time_varying_covariates[as.numeric(sapply(strsplit(time_varying_covariates, "\\."), "[", 2)) < time] } else if (type == "short"){ + if(verbose){ message("USER ALERT: Please manually inspect the short balancing formula below that includes time-varying confounders at t-1 only:") } time_var_include <- time_varying_covariates[as.numeric(sapply(strsplit(time_varying_covariates, "\\."), "[", 2)) == - exposure_time_pts[x-1]] + exposure_time_pts[x - 1]] } else if (type == "update"){ + if(is.null(bal_stats)){ - stop("Please provide balance statistics from the assessBalance() function if you wish to run the update version of this function", call. = FALSE) + stop("Please provide balance statistics from the assessBalance() function if you wish to run the update version of this function", + call. = FALSE) } else if (!is.data.frame(bal_stats)){ - stop("Please provide balance statistics from the assessBalance() function if you wish to run the update version of this function", call. = FALSE) - + stop("Please provide balance statistics from the assessBalance() function if you wish to run the update version of this function", + call. = FALSE) } if(verbose){ - message("USER ALERT: Please manually inspect the updated balancing formula below that includes time-varying confounders at t-1 and those greater at further lags that remained imbalanced:") + message("USER ALERT: Please manually inspect the updated balancing formula below that includes time-varying confounders at t-1 and + those greater at further lags that remained imbalanced:") } time_var_include <- time_varying_covariates[as.numeric(sapply(strsplit(time_varying_covariates, "\\."), "[", 2)) == - exposure_time_pts[x-1]] + exposure_time_pts[x - 1]] if (x > 1) { - # new <- bal_stats %>% - # dplyr::filter(balanced == 0) %>% - # dplyr::filter(exp_time == time, as.numeric(covar_time) < exposure_time_pts[x - 1], - # as.numeric(covar_time) > 0) %>% # Finds any lagged imbalanced covars - # dplyr::select(covariate) new <- bal_stats[bal_stats$balanced == 0 & bal_stats$exp_time == time & - as.numeric(bal_stats$covar_time) < exposure_time_pts[x-1] & + as.numeric(bal_stats$covar_time) < exposure_time_pts[x - 1] & as.numeric(bal_stats$covar_time) > 0, ] @@ -262,15 +263,20 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, time_var_include <- c(time_var_include, new) if (verbose){ - cat(paste0("For ", exposure, " at exposure time point ", time , - ", the following covariate(s) will be added to the short balancing formula: "), paste(new, collapse = ", "), "\n") + # cat(paste0("For ", exposure, " at exposure time point ", time , + # ", the following covariate(s) will be added to the short balancing formula: "), paste(new, collapse = ", "), "\n") + cat(sprintf("For %s at exposure time point %s the following covariate(s) will be added to the short balancing formula: %s \n", + exposure, time, paste(new, collapse = ", "))) + cat("\n") } } else{ if (verbose) { - cat(paste0("For ", exposure, " at exposure time point ", time , - " no time-varying confounders at additional lags were added."), "\n") + # cat(paste0("For ", exposure, " at exposure time point ", time , + # " no time-varying confounders at additional lags were added."), "\n") + cat(sprintf("For %s at exposure time point %s no time-varying confounders at additional lags were added. \n", + exposure, time )) cat("\n") } } @@ -320,15 +326,17 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, # Prints form for user inspection if (verbose){ - cat(paste0("The ", type, " formula for ", exposure, "-", outcome, " at ", exposure, " time point ", - as.character(time), " is:"), "\n") + cat(sprintf("The %s formula for %s - %s at %s time point %s is: \n", + type, exposure, outcome, exposure, as.character(time))) print(f) cat("\n") } # Appends the form string to forms_csv - forms_csv <- c(forms_csv, paste0(type, " formula for ", exposure, "-", outcome, " at ", exposure, - " time point ", as.character(time), ":")) + forms_csv <- c(forms_csv, + sprintf("%s formula for %s-%s at %s time point %s:", + type, exposure, outcome, exposure, as.character(time))) + forms_csv <- c(forms_csv, paste(exposure, "~", paste0(vars_to_include[order(vars_to_include)], sep = "", collapse = " + "))) # Assigns the form to forms list @@ -337,11 +345,17 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, if(save.out){ # Writes forms_csv to a CSV file - forms_csv_file <- paste0(forms_dir, "/", type, "_", exposure, "-", outcome, "_", type, "_balancing_formulas.csv") + forms_csv_file <- + # paste0(forms_dir, "/", type, "_", exposure, "-", outcome, "_", type, "_balancing_formulas.csv") + sprintf("%s/%s_%s-%s_%s_balancing_formulas.csv", + forms_dir, type, exposure, outcome, type) + writeLines(forms_csv, con = forms_csv_file) # writes to rds - forms_rds_file <- paste0(forms_dir, "/", type, "_", exposure, "-", outcome, "_", type, "_balancing_formulas.rds") + # forms_rds_file <- paste0(forms_dir, "/", type, "_", exposure, "-", outcome, "_", type, "_balancing_formulas.rds") + forms_rds_file <- sprintf("%s/%s_%s-%s_%s_balancing_formulas.rds", + forms_dir, type, exposure, outcome, type) saveRDS(forms, file = forms_rds_file) } diff --git a/R/eval_hist.R b/R/eval_hist.R index 3d2d378b..55fa3f8a 100644 --- a/R/eval_hist.R +++ b/R/eval_hist.R @@ -60,7 +60,9 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, ref = NA, comps = NULL, verbose = TRUE){ - exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', time_pts[1])], "numeric"), "continuous", "binary") + # exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', time_pts[1])], "numeric"), "continuous", "binary") + exposure_type <- if (inherits(data[, paste0(exposure, '.', time_pts[1])], "numeric")) "continuous" else "binary" + data_wide <- data @@ -72,10 +74,10 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, new <- data[, c("ID", paste(exposure, time_pts, sep = "."))] } else{ - #new will have cols for epochs new <- data.frame(ID = data_wide[, "ID"]) colnames(new) <- "ID" + # Averages exposure across time points that constitute the exposure epochs (e.g., infancy = 6 & 15) for (e in seq_len(nrow(epochs))) { epoch <- epochs[e, 1] @@ -90,23 +92,18 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, z <- as.numeric(as.character(unlist(z[, cols]))) temp <- cbind(temp, z) } - # new <- new %>% x <- as.data.frame(rowMeans(temp, na.rm = TRUE)) colnames(x) <- c(new_var) new <- cbind(new, x) - # dplyr::mutate(!!new_var := rowMeans(temp, na.rm = TRUE)) } } if(verbose){ cat("Summary of Exposure Main Effects:", "\n") summary(new[, !colnames(new) %in% "ID"]) - # print(psych::describe(new %>% select(-c("ID")), fast = TRUE)) cat("\n") - } - tot_hist <- apply(gtools::permutations(2, nrow(epochs), c("l", "h"), repeats.allowed = TRUE), 1, paste, sep = "", collapse = "-") @@ -115,15 +112,17 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, tot_hist <- tot_hist[tot_hist %in% c(ref, comps)] } - epochs$epochs <- as.character(epochs$epochs) - if (exposure_type == "continuous"){ + if(is.null(hi_lo_cut)){ + # use median as hi/lo split (default) new$history <- lapply(seq_len(nrow(new)), function(x) { + paste(lapply(seq_len(nrow(epochs)), function(y) { + if (is.na(new[x, y + 1])) { return(NA) } @@ -135,9 +134,11 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, } }), collapse = "-") }) + } else{ #user-supplied values if (length(hi_lo_cut) == 2){ + hi_cutoff <- hi_lo_cut[1] lo_cutoff <- hi_lo_cut[2] @@ -152,12 +153,15 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, if (hi_lo_cut > 1 || hi_lo_cut < 0) { stop('Please select a hi_lo cutoff value between 0 and 1', call. = FALSE) } + hi_cutoff <- hi_lo_cut lo_cutoff <- hi_lo_cut } new$history <- lapply(seq_len(nrow(new)), function(x) { + paste(lapply(seq_len(nrow(epochs)), function(y) { + if (is.na(new[x, y + 1])) { return(NA) } @@ -175,8 +179,11 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, } else if (exposure_type == "binary"){ + new$history <- lapply(seq_len(nrow(new)), function(x) { + paste(lapply(seq_len(nrow(epochs)), function(y) { + if (is.na(new[x, y + 1])) { return(NA) } @@ -186,21 +193,18 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, if (new[x, y + 1] == 0) { return("l") } + }), collapse = "-") + }) } # Summarizing n's by history - # his_summ <- new %>% - # dplyr::group_by(history) %>% - # dplyr::summarize(n = dplyr::n()) new$history <- unlist(new$history) his_summ <- aggregate( ID ~ history, data = new, FUN = function(x) n = length(x)) if( !is.na(ref) & !is.null(comps)){ - # his_summ <- his_summ %>% - # dplyr::filter(history %in% c(ref, comps)) his_sum <- his_summ[his_summ$history %in% c(ref, comps), ] } @@ -209,32 +213,65 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, if(verbose){ if(!is.null(hi_lo_cut)){ - cat(paste0("USER ALERT: Out of the total of ", nrow(data_wide), " individuals in the sample, below is the distribution of the ", sum(his_summ$n), " (", - round((sum(his_summ$n) / nrow(data_wide)) * 100, 2), "%) that fall into ", nrow(his_summ), " out of the ", length(tot_hist), - " the total user-defined exposure histories created from ", - hi_lo_cut[2] * 100, "th and ", hi_lo_cut[1] * 100, "th percentile values for low and high levels of exposure ", exposure, - ", respectively, across ", paste(epochs$epochs, collapse = ", ")), "\n") + # cat(paste0("USER ALERT: Out of the total of ", nrow(data_wide), " individuals in the sample, below is the distribution of the ", sum(his_summ$n), " (", + # round((sum(his_summ$n) / nrow(data_wide)) * 100, 2), "%) that fall into ", nrow(his_summ), " out of the ", length(tot_hist), + # " the total user-defined exposure histories created from ", + # hi_lo_cut[2] * 100, "th and ", hi_lo_cut[1] * 100, "th percentile values for low and high levels of exposure ", exposure, + # ", respectively, across ", paste(epochs$epochs, collapse = ", ")), "\n") + + cat(sprintf("USER ALERT: Out of the total of %s individuals in the sample, below is the distribution of the %s (%s%%) + that fall into %s out of the %s the total user-defined exposure histories created from + %sth and %sth percentile values for low and high levels of exposure %s, respectively, across %s. \n", + nrow(data_wide), + sum(his_summ$n), + round((sum(his_summ$n) / nrow(data_wide)) * 100, 2), + nrow(his_summ), + length(tot_hist), + hi_lo_cut[2] * 100, + hi_lo_cut[1] * 100, + exposure, + paste(epochs$epochs, collapse = ", "))) + } else{ - cat(paste0("USER ALERT: Out of the total of ", nrow(data_wide), " individuals in the sample, below is the distribution of the ", sum(his_summ$n), " (", - round((sum(his_summ$n) / nrow(data_wide)) * 100, 2), "%) that fall into ", nrow(his_summ), " out of the ", length(tot_hist), - " the total user-defined exposure histories created from median split values for low and high levels of exposure ", exposure, - ", respectively, across ", paste(epochs$epochs, collapse = ", ")), "\n") + # cat(paste0("USER ALERT: Out of the total of ", nrow(data_wide), " individuals in the sample, below is the distribution of the ", sum(his_summ$n), " (", + # round((sum(his_summ$n) / nrow(data_wide)) * 100, 2), "%) that fall into ", nrow(his_summ), " out of the ", length(tot_hist), + # " the total user-defined exposure histories created from median split values for low and high levels of exposure ", exposure, + # ", respectively, across ", paste(epochs$epochs, collapse = ", ")), "\n") + + cat(sprintf("USER ALERT: Out of the total of %s individuals in the sample, below is the distribution of the %s (%s%%) that fall into %s + out of the %s total user-defined exposure histories created from median split values for low and high levels of exposure + %s, respectively, across %s. \n", + nrow(data_wide), + sum(his_summ$n), + round((sum(his_summ$n) / nrow(data_wide)) * 100, 2), + nrow(his_summ), + length(tot_hist), + exposure, + paste(epochs$epochs, collapse = ", "))) } - cat("USER ALERT: Please inspect the distribution of the sample across the following exposure histories and ensure there is sufficient spread to avoid extrapolation and low precision:", "\n") + cat("USER ALERT: Please inspect the distribution of the sample across the following exposure histories and ensure there is + sufficient spread to avoid extrapolation and low precision:", "\n") if (nrow(his_summ) != length(tot_hist)) { - cat(paste0("USER ALERT: There are no individuals in your sample that fall into ", - paste(tot_hist[!tot_hist %in% his_summ$history], collapse = " & "), - " exposure history/histories. You may wish to consider different high/low cutoffs (for continuous exposures), alternative epochs, or choose a different measure to avoid extrapolation."), "\n") + # cat(paste0("USER ALERT: There are no individuals in your sample that fall into ", + # paste(tot_hist[!tot_hist %in% his_summ$history], collapse = " & "), + # " exposure history/histories. You may wish to consider different high/low cutoffs (for continuous exposures), alternative epochs, or choose a different measure to avoid extrapolation."), "\n") + cat(sprintf("USER ALERT: There are no individuals in your sample that fall into %s exposure history/histories. + You may wish to consider different high/low cutoffs (for continuous exposures), alternative epochs, or choose a different measure to avoid extrapolation.\n", + paste(tot_hist[!tot_hist %in% his_summ$history], collapse = " & "))) cat("\n") } cat("\n") - cat(knitr::kable(his_summ, caption = paste0("Summary of user-specified exposure ", exposure, - " histories based on exposure main effects ", paste(epochs$epochs, collapse = ", "), - " containing time points ", paste(epochs$values, collapse = ", "), ":"), + cat(knitr::kable(his_summ, caption = + # paste0("Summary of user-specified exposure ", exposure, + # " histories based on exposure main effects ", paste(epochs$epochs, collapse = ", "), + # " containing time points ", paste(epochs$values, collapse = ", "), ":"), + sprintf("Summary of user-specified exposure %s histories based on exposure main effects %s + containing time points %s:", + exposure, paste(epochs$epochs, collapse = ", "), paste(epochs$values, collapse = ", ")), format = 'pipe', row.names = F), sep = "\n") } diff --git a/R/fitModel.R b/R/fitModel.R index 6340a7b9..bfbebf51 100644 --- a/R/fitModel.R +++ b/R/fitModel.R @@ -220,9 +220,12 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco # Lists out exposure-epoch combos if( is.null(epochs)){ #making epochs time pts if not specified by user + epochs <- data.frame(epochs = as.character(exposure_time_pts), values = exposure_time_pts) + } else{ + if( !is.data.frame(epochs) | ncol(epochs) != 2 | sum(colnames(epochs) == c("epochs", "values")) != ncol(epochs)){ stop("If you supply epochs, please provide a dataframe with two columns of epochs and values.", call. = FALSE) @@ -251,20 +254,25 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco # } if (mice::is.mids(data)){ #imputed dataset + fits <- lapply(seq_len(data$m), function(y) { + d <- mice::complete(data, y) d$weights <- NULL d$weights <- weights[[y]]$weights getModel(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs, int_order, model, family, covariates, verbose) + }) fits.null <- lapply(seq_len(data$m), function(y) { + d <- mice::complete(data, y) d$weights <- NULL d$weights <- weights[[y]]$weights getModel(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs, int_order, model = n, family, covariates, verbose) + }) if (verbose){ @@ -281,17 +289,21 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco else if (inherits(data, "list")){ #imputed dataset fits <- lapply(seq_len(length(data)), function(y) { + d <- data[[y]] d$weights <- NULL d$weights <- weights[[y]]$weights getModel(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs, int_order, model, family, covariates, verbose) + }) fits.null <- lapply(seq_len(length(data)), function(y) { + d <- data[[y]] d$weights <- NULL d$weights <- weights[[y]]$weights getModel(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs, int_order, model = n, family, covariates, verbose) + }) if (verbose){ @@ -307,17 +319,21 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco else if (is.data.frame(data)){ #df fits <- lapply(1, function(y) { + d <- data d$weights <- NULL d$weights <- weights[["0"]]$weights getModel(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs, int_order, model, family, covariates, verbose) + }) fits.null <- lapply(1, function(y) { + d <- data d$weights <- NULL d$weights <- weights[["0"]]$weights getModel(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs, int_order, model = n, family, covariates, verbose) + }) if (verbose){ @@ -339,16 +355,23 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco names(fits) <- seq_len(length(fits)) if (verbose){ - cat(paste0("The marginal model, ", model, ", run for each imputed dataset is summarized below:"), "\n") + + # cat(paste0("The marginal model, ", model, ", run for each imputed dataset is summarized below:"), "\n") + sprintf("The marginal model, %s run for each imputed dataset is summarized below: \n", + model) + print(suppressWarnings(jtools::export_summs( - fits, statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), + fits, + statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), model.names = c(paste0("Imp.", seq_len(length(fits)))) ))) } if(save.out){ suppressWarnings(jtools::export_summs( - fits, to.file = "docx", statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), + fits, + to.file = "docx", + statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), model.names = c(paste0("Imp.", seq_len(length(fits)))), file.name = file.path(home_dir, "models", paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx")) )) @@ -360,13 +383,17 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco if (verbose){ require(survey) cat(paste0("The marginal model, ", model, ", is summarized below:"), "\n") - print(suppressWarnings(jtools::export_summs(fits, statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared")))) + print(suppressWarnings(jtools::export_summs( + fits, + statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared")))) } if (save.out){ # requireNamespace(officer) #is there another way to do this? required for writing to word # requireNamespace(flextable) # " " - suppressWarnings(jtools::export_summs(fits, to.file = "docx", statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), + suppressWarnings(jtools::export_summs(fits, + to.file = "docx", + statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), file.name = file.path(home_dir, "models", paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx")))) } @@ -375,7 +402,8 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco if(save.out){ - saveRDS(fits, file = file.path(home_dir, "/models/", paste0(exposure, "-", outcome, "_", model, "_model.rds"))) + saveRDS(fits, + file = file.path(home_dir, "/models/", paste0(exposure, "-", outcome, "_", model, "_model.rds"))) cat("\n") if (verbose){ diff --git a/R/getModel.R b/R/getModel.R index e25a3329..31ba5656 100644 --- a/R/getModel.R +++ b/R/getModel.R @@ -93,8 +93,6 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs temp <- cbind(temp, z) } #adds a new variable of the exposure averaged within epoch - # d <- d %>% dplyr::mutate(!!new_var := rowMeans(temp, na.rm=T)) - x <- as.data.frame(rowMeans(temp, na.rm = TRUE)) colnames(x) <- c(new_var) d <- cbind(d, x) @@ -133,7 +131,6 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs name <- gsub(" ", "", unlist(strsplit(interactions, "\\+"))[x]) if (! name %in% colnames(d)){ temp <- d[, c(gsub(" ", "", as.character(unlist(strsplit(unlist(strsplit(interactions, "\\+"))[x], "\\:"))))) ] - # d <- d %>% dplyr::mutate(!! name := matrixStats::rowProds(as.matrix(temp), na.rm = T)) new <- matrixStats::rowProds(as.matrix(temp), na.rm = T) names(new) <- name d <- cbind(d, new) @@ -156,19 +153,25 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs # Fitting intercept-only model if (model == "int"){ fi <- paste(outcome, "~ 1") - mi <- survey::svyglm(as.formula(fi), family = fam, design = s) + mi <- survey::svyglm(as.formula(fi), + family = fam, + design = s) return(mi) } else if (model == "covs"){ fc <- paste(outcome, "~", covariate_list) - mc <- survey::svyglm(as.formula(fc), family = fam, design = s) + mc <- survey::svyglm(as.formula(fc), + family = fam, + design = s) return(mc) } # Fitting baseline model w/ main effects only (m0) for all models f0 <- paste(outcome, "~", paste0(exp_epochs, sep = "", collapse = " + ")) - m0 <- survey::svyglm(as.formula(f0), family = fam, design = s) + m0 <- survey::svyglm(as.formula(f0), + family = fam, + design = s) if (model == "m0") { return(m0) # Save model @@ -177,7 +180,9 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs # Baseline + sig covar model OR baseline + sig covar + int model if (model == "m1" | model == "m3") { f1 <- paste(f0, "+", covariate_list) # Baseline + covariate model - m1 <- survey::svyglm(as.formula(f1), family = fam, design = s) + m1 <- survey::svyglm(as.formula(f1), + family = fam, + design = s) # Baseline + imbalanced covars if (model == "m1") { @@ -192,7 +197,9 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs # Baseline + interactions if (model == "m2") { # Fitting m2 - m2 <- survey::svyglm(as.formula(f2), family = fam, design = s) + m2 <- survey::svyglm(as.formula(f2), + family = fam, + design = s) # Baseline + interactions return(m2) } @@ -202,7 +209,9 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs # Fitting m3 f3 <- paste0(f1, "+", paste(interactions, sep = "", collapse = " + ")) f3 <- as.formula(f3) - m3 <- survey::svyglm(f3, family = fam, design = s) + m3 <- survey::svyglm(f3, + family = fam, + design = s) # Baseline + covars+ interactions return(m3) diff --git a/R/make_love_plot.R b/R/make_love_plot.R index ccb0af08..b9491508 100644 --- a/R/make_love_plot.R +++ b/R/make_love_plot.R @@ -88,20 +88,20 @@ make_love_plot <- function(home_dir, folder, exposure, exposure_time_pt, exposur stat_var <- colnames(balance_stats)[grepl("_bal", colnames(balance_stats))] colnames(balance_stats)[colnames(balance_stats) == stat_var] <- "avg_bal" # balance_stats <- balance_stats %>% dplyr::arrange(avg_bal) - balance_stats <- balance_stats[order(balance_stats$avg_bal), ] + balance_stats <- balance_stats[order(balance_stats$avg_bal), , drop = FALSE] - x_lab <- ifelse(exposure_type == "continuous", "Correlation with Exposure", "Standardized Mean Difference Between Exposures") + x_lab <- if(exposure_type == "continuous") "Correlation with Exposure" else "Standardized Mean Difference Between Exposures" - labels <- ifelse(balance_stats$balanced == 0, balance_stats$covariate, "") + labels <- if(sum(balance_stats$balanced == 0) > 0) balance_stats$covariate else "" - min_val <- ifelse(min(balance_stats[, "avg_bal"]) < 0, min(balance_stats[, "avg_bal"]) - 0.05, min(balance_thresh) - 0.05) + min_val <- if(min(balance_stats[, "avg_bal"]) < 0) min(balance_stats[, "avg_bal"]) - 0.05 else min(balance_thresh) - 0.05 if (min_val > -(max(balance_thresh))){ - min_val = -(max(balance_thresh)) - 0.05 #to make sure user-supplied balance thresh is on the figure + min_val <- -(max(balance_thresh)) - 0.05 #to make sure user-supplied balance thresh is on the figure } - max_val <- ifelse(max(balance_stats[, "avg_bal"]) > 0, max(balance_stats[, "avg_bal"]) + 0.05, max(balance_thresh) + 0.05) + max_val <- if(max(balance_stats[, "avg_bal"]) > 0) max(balance_stats[, "avg_bal"]) + 0.05 else max(balance_thresh) + 0.05 if (max_val < max(balance_thresh)){ - max_val = max(balance_thresh) + 0.05 #to make sure user-supplied balance thresh is on the figure + max_val <- max(balance_thresh) + 0.05 #to make sure user-supplied balance thresh is on the figure } # Make love plot per exposure time point @@ -143,18 +143,25 @@ make_love_plot <- function(home_dir, folder, exposure, exposure_time_pt, exposur lp <- lp + ggplot2::ggtitle(paste0(exposure, " (t = ", exposure_time_pt, ") Balance for Imputation ", k)) if(save.out){ - suppressMessages(ggplot2::ggsave(lp, filename = paste0(home_dir, "/balance/", folder, "plots/", - form_name, "_imp_", k, "_", exposure, "_", exposure_time_pt, "_", - weights_method, "_summary_balance_plot.jpeg"), width = 6, height = 8)) + suppressMessages(ggplot2::ggsave(lp, filename = + # paste0(home_dir, "/balance/", folder, "plots/", + # form_name, "_imp_", k, "_", exposure, "_", exposure_time_pt, "_", + # weights_method, "_summary_balance_plot.jpeg") + sprintf("%s/balance/%splots/%s_imp_%s_%s_%s_%s_summary_balance_plot.jpeg", + home_dir, folder, form_name, k, exposure, exposure_time_pt, weights_method), + width = 6, height = 8)) } } else { lp <- lp + ggplot2::ggtitle(paste0(exposure, " (t = ", exposure_time_pt, ") Balance")) if(save.out){ - suppressMessages(ggplot2::ggsave(lp, filename = paste0(home_dir, "/balance/", folder, "plots/", form_name, "_", exposure, "_", - exposure_time_pt, "_", weights_method, "_summary_balance_plot.jpeg"), - width = 6, height = 8)) + suppressMessages(ggplot2::ggsave(lp, filename = + # paste0(home_dir, "/balance/", folder, "plots/", form_name, "_", exposure, "_", + # exposure_time_pt, "_", weights_method, "_summary_balance_plot.jpeg"), + sprintf("%s/balance/%splots/%s_%s_%s_%s_summary_balance_plot.jpeg", + home_dir, folder, form_name, exposure, exposure_time_pt, weights_method), + width = 6, height = 8)) } } diff --git a/R/trimWeights.R b/R/trimWeights.R index 763eaeea..c30240f5 100644 --- a/R/trimWeights.R +++ b/R/trimWeights.R @@ -7,7 +7,7 @@ #' @seealso [WeightIt::trim()], #' which #' this function wraps -#' @param home_dir path to home directory +#' @param home_dir path to home directory (required if save.out = TRUE) #' @param exposure name of exposure variable #' @param outcome name of outcome variable with ".timepoint" suffix #' @param weights list of IPTW weights output from createWeights() @@ -137,25 +137,37 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v t <- WeightIt::trim(w$weights, at = quantile) if (verbose){ - cat('\n') - cat(paste0("For imputation ", x, " and the ", exposure, "-", outcome, " relation, following trimming at the ", - quantile, " quantile, the median weight value is ", round(median(t), 2) , - " (SD= ", round(sd(t), 2), "; range= ", round(min(t), 2), "-", round(max(t), 2), ")."), "\n") - cat('\n') + cat('\n') + # cat(paste0("For imputation ", x, " and the ", exposure, "-", outcome, " relation, following trimming at the ", + # quantile, " quantile, the median weight value is ", round(median(t), 2) , + # " (SD= ", round(sd(t), 2), "; range= ", round(min(t), 2), "-", round(max(t), 2), ")."), "\n") + cat(sprintf("For imputation %s and the %s-%s relation, following trimming at the %s quantile, the median weight value is + %s (SD= %s; range= %s-%s). \n", + x, exposure, outcome, quantile, round(median(t), 2), round(sd(t), 2), round(min(t), 2), round(max(t)))) + cat('\n') } # Save histogram of new weights p <- ggplot2::ggplot(as.data.frame(t), ggplot2::aes(x = t)) + - ggplot2::geom_histogram(color = 'black', bins = 15) + ggplot2::geom_histogram(color = 'black', bins = 15) + + ggplot2::ggtitle( + # paste0("Weights trimmed at the ", quantile, "th value")) + sprintf("Weights trimmed at the %s th value", quantile)) if(verbose){ print(p) } if(save.out){ - ggplot2::ggsave(paste("Hist_", exposure, "-", outcome, "_", weights[[x]]$method, "_weights_trim_", - quantile, "_imp_", x, ".png", sep = ""), path = paste0(home_dir, "/weights/histograms/"), plot = p, - height = 8, width = 14) + ggplot2::ggsave( + # paste("Hist_%s-%s_%s_weights_trim_%s_imp_%s.png", sep = ""), + sprintf("Hist_%s-%s_%s_weights_trim_%s_imp_%s.png", + exposure, outcome, weights[[x]]$method, quantile, x), + path = + # paste0(home_dir, "/weights/histograms/"), + sprintf("%s/weights/histograms/", home_dir), + plot = p, + height = 8, width = 14) } # w$weights <- NA @@ -173,37 +185,55 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v t <- WeightIt::trim(w$weights, at = quantile) if (verbose){ - cat('\n') - cat(paste0("For the ", exposure, "-", outcome, " relation, following trimming at the ", - quantile, " quantile, the median weight value is ", round(median(t), 2) , - " (SD= ", round(sd(t), 2), "; range= ", round(min(t), 2), "-", round(max(t), 2), ")."), "\n") - cat('\n') + cat('\n') + # cat(paste0("For the ", exposure, "-", outcome, " relation, following trimming at the ", + # quantile, " quantile, the median weight value is ", round(median(t), 2) , + # " (SD= ", round(sd(t), 2), "; range= ", round(min(t), 2), "-", round(max(t), 2), ")."), "\n") + cat(sprintf("For the %s-%s relation, following trimming at the %s quantile, the median weight value is + %s (SD= %s; range= %s-%s). \n", + exposure, outcome, quantile, round(median(t), 2), round(sd(t), 2), round(min(t), 2), round(max(t)))) + cat('\n') } # Save histogram of new weights p <- ggplot2::ggplot(as.data.frame(t), ggplot2::aes(x = t)) + ggplot2::geom_histogram(color = 'black', bins = 15) + - ggplot2::ggtitle(paste0("Weights trimmed at the ", quantile, "th value")) + ggplot2::ggtitle( + # paste0("Weights trimmed at the ", quantile, "th value")) + sprintf("Weights trimmed at the %sth value", quantile)) if(verbose){ print(p) } if(save.out){ - ggplot2::ggsave(paste("Hist_", exposure, "-", outcome,"_",weights[[x]]$method, "_weights_trim_", quantile, ".png", sep = ""), plot = p, - path = paste0(home_dir, "/weights/histograms/"), height = 8, width = 14) + # ggplot2::ggsave(paste("Hist_", exposure, "-", outcome,"_",weights[[x]]$method, "_weights_trim_", quantile, ".png", sep = ""), plot = p, + # path = paste0(home_dir, "/weights/histograms/"), height = 8, width = 14) + + ggplot2::ggsave( + # paste("Hist_%s-%s_%s_weights_trim_%s_imp_%s.png", sep = ""), + sprintf("Hist_%s-%s_%s_weights_trim_%s.png", + exposure, outcome, weights[[x]]$method, quantile), + path = + # paste0(home_dir, "/weights/histograms/"), + sprintf("%s/weights/histograms/", home_dir), + plot = p, + height = 8, width = 14) } - w$weights <- NA - w$weights <- t - w + w$weights <- NA + w$weights <- t + w }) names(trim_weights) <- "0" } if(save.out){ # Save truncated weight data - saveRDS(trim_weights, paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", weights[[1]]$method, "_weights_trim.rds")) + saveRDS(trim_weights, + # paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", weights[[1]]$method, "_weights_trim.rds")) + sprintf("%s/weights/values/%s-%s_%s_weights_trim.rds", + home_dir, exposure, outcome,weights[[1]]$method )) } trim_weights From edbd27b1cc1cc037c6f5fb60f9a1bb19eb90875f Mon Sep 17 00:00:00 2001 From: "Isabella Stallworthy, PhD" Date: Wed, 27 Sep 2023 17:33:28 -0400 Subject: [PATCH 09/12] noah feedback --- R/assessBalance.R | 17 ++-- R/calcBalStats.R | 71 +++++------------ R/compareHelpers.R | 2 +- R/compareHistories.R | 110 ++++++++++++++++---------- R/createFormulas.R | 28 +++---- R/createWeights.R | 154 +++++++++++++++++++++++++------------ R/eval_hist.R | 21 ++--- R/fitModel.R | 48 +++++++----- R/getModel.R | 1 + R/make_love_plot.R | 4 +- R/trimWeights.R | 12 +-- examplePipelineRevised.Rmd | 13 +++- man/make_love_plot.Rd | 2 +- man/trimWeights.Rd | 2 +- 14 files changed, 274 insertions(+), 211 deletions(-) diff --git a/R/assessBalance.R b/R/assessBalance.R index 08855ec9..ab1925a9 100644 --- a/R/assessBalance.R +++ b/R/assessBalance.R @@ -175,34 +175,34 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, if (missing(type)){ stop("Please supply a 'weighted', 'prebalance' type", call. = FALSE) } - if (!inherits(type, "character") | length(type) != 1 ){ + if (!inherits(type, "character") || length(type) != 1 ){ stop("Please provide a single type as a character string from the following list: 'prebalance', 'weighted'", call. = FALSE) } else if(!type %in% c("prebalance", "weighted")){ stop("Please provide a type from the following list: 'prebalance', 'weighted'", call. = FALSE) } - else if (type == "prebalance" & !is.null(weights)){ + else if (type == "prebalance" && !is.null(weights)){ stop("The 'prebalance' mode of this function assesses balance prior to weighting and thus does not take weights.", call. = FALSE) } - else if (type == "weighted" & (is.null(weights) | missing(weights))){ + else if (type == "weighted" && (is.null(weights) || missing(weights))){ stop("The 'weighted' mode of this function requires weights be supplied in the form of output from createWeights.", call. = FALSE) } - if (!is.null(weights) & ! inherits(weights, "list")){ + if (!is.null(weights) && !inherits(weights, "list")){ stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) } if(!is.numeric(balance_thresh)){ stop("Please provide one or two balance thresholds as numbers from 0-1.") } - if (length(balance_thresh) == 2 & is.null(imp_conf)){ + if (length(balance_thresh) == 2 && is.null(imp_conf)){ stop("If you wish to provide different balance threshold for important and less important confounders, please provide a list of important confounders in the 'imp_conf' field.", call. = FALSE) } - if (!is.null(imp_conf) & length(balance_thresh) == 1){ + if (!is.null(imp_conf) && length(balance_thresh) == 1){ stop("If you provide a list of important confounders, please provide a list of two balance thresholds for important and less important confounders, respectively", call. = FALSE) } - else if(!is.null(imp_conf) & !is.character(imp_conf)){ + else if(!is.null(imp_conf) && !is.character(imp_conf)){ stop("Please provide a list variable names as characters that are important confounders.", call. = FALSE) } @@ -220,9 +220,6 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, stop("Please provide a single TRUE or FALSE value to save.out.", call. = FALSE) } - - # folder <- ifelse(type == "prebalance", "prebalance/", "weighted/") - mi <- !is.data.frame(data) folder <- switch(type, "prebalance" = "prebalance/", "weighted/") diff --git a/R/calcBalStats.R b/R/calcBalStats.R index edea6437..3dcce37c 100644 --- a/R/calcBalStats.R +++ b/R/calcBalStats.R @@ -81,13 +81,13 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if(!inherits(formulas, "list")){ stop("Please provide a list of formulas for each exposure time point", call. = FALSE) } - if (!is.null(weights) & !inherits(weights, "weightitMSM")){ + if (!is.null(weights) && !inherits(weights, "weightitMSM")){ stop("Please supply a list of weights output from the createWeights function (via WeightIt::WeightItMSM).", call. = FALSE) } form_name <- sapply(strsplit(names(formulas[1]), "_form"), "[", 1) - exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric"), "continuous", "binary") - weighted = ifelse(!is.null(weights), 1, 0) + exposure_type <- if(inherits(data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric")) "continuous" else "binary" + weighted <- if(!is.null(weights)) 1 else 0 factor_covariates <- colnames(data)[which(sapply(data, class) == "factor")] factor_covariates <- factor_covariates[!factor_covariates %in% "ID"] @@ -106,7 +106,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ data_type <- if (k == 0) "single" else "imputed" - if (data_type == "imputed" & verbose){ + if (data_type == "imputed" && verbose){ cat(paste0("**Imputation ", k, "**"), "\n") } @@ -218,10 +218,10 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (exp == 0) { # low levels/absent if (exposure_type == "continuous") { data$flag <- ifelse(data[, exps_time[t]] <= median(data[, paste0(exposure, ".", exposure_time_pt)]) - & data$flag == flag, t, NA) # finding those w/ vals <= median exp @ time pt & flagged at prev t's + && data$flag == flag, t, NA) # finding those w/ vals <= median exp @ time pt & flagged at prev t's } else { # for binary exp - data$flag <- ifelse(data[, exps_time[t]] == 0 & data$flag == flag, t, NA) # if exposure is absent & flagged at prev t's + data$flag <- ifelse(data[, exps_time[t]] == 0 && data$flag == flag, t , NA) # if exposure is absent & flagged at prev t's } } @@ -229,10 +229,10 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (exp == 1) { # hi levels/present if (exposure_type == "continuous") { data$flag <- ifelse(data[, exps_time[t]] > median(data[, paste0(exposure, ".", exposure_time_pt)]) - & data$flag == flag, t, NA) # finding those w/ vals > median exp @ time pt & flagged at prev t's + && data$flag == flag, t, NA) # finding those w/ vals > median exp @ time pt & flagged at prev t's } else { # binary exp - data$flag <- ifelse(data[, exps_time[t]] == 1 & data$flag == flag, t, NA) # if exposure is present & flagged at prev t's + data$flag <- ifelse(data[, exps_time[t]] == 1 && data$flag == flag, t , NA) # if exposure is present & flagged at prev t's } } @@ -486,7 +486,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ } else{ bal_stats$bal_thresh <- balance_thresh - bal_stats$balanced <- ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1, 0) + bal_stats$balanced <- ifelse(abs(bal_stats$std_bal_stats) < bal_stats$bal_thresh, 1 , 0) } bal_stats$exposure <- exposure @@ -505,9 +505,6 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (verbose & save.out) { if (data_type == "imputed"){ - # cat(paste0("For each time point and imputation, ", gsub("/", "", folder), " summary plots for ", - # form_name, " formulas weighting method ", - # weights_method, " have now been saved in the '", folder, "plots/' folder."), "\n") cat(paste0("For each time point and imputation, %s summary plots for %s formulas weighting method %s have now been saved in the %s plots/' folder.\n", @@ -515,9 +512,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ } else { - # cat(paste0(" For each time point, ", gsub("/", "", folder), " summary plots for ", - # form_name, " formulas and weighting method ", - # weights_method, " have now been saved in the '", folder, "plots/' folder."), "\n") + cat(paste0("For each time point, %s summary plots for %s formulas weighting method %s have now been saved in the %s plots/' folder.\n", gsub("/", "", folder), form_name, weights_method, folder)) @@ -536,14 +531,10 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (save.out){ write.csv(bal_summary_exp, - # paste0(home_dir, "/balance/", folder, form_name, "_", exposure, "_", k, "_", - # weights_method, "_balance_stat_summary.csv")) sprintf("%s/balance/%s%s_%s_%s_%s_balance_stat_summary.csv", home_dir,folder, form_name, exposure, k, weights_method)) write.csv(all_prop_weights, - # paste0(home_dir, "/balance/", folder, "/", form_name, "_form_", exposure, "_", k, - # "_", weights_method, "_history_sample_weight.csv")) sprintf("%s/balance/%s/%s_form_%s_%s_%s_history_sample_weight.csv", home_dir, folder, form_name, exposure, k, weights_method)) @@ -555,20 +546,15 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ %s have been saved in the 'balance/%s' folder. \n", form_name, exposure, k, weights_method, folder)) - # cat(paste0("Sampling weights ", "using the ", form_name, " for ", exposure, ", imputation ", k, - # " have been saved in the 'balance/", folder, "' folder"), "\n") cat(sprintf("Sampling weights using the %s for %s imputation %s have been saved in the 'balance/%s' folder., \n", form_name, exposure, k, folder)) } else{ - # cat(paste0("Balance statistics using ", form_name, " formulas for ", exposure, "using ", - # weights_method, " have been saved in the 'balance/", folder, "' folder"), "\n") + cat(sprintf("Balance statistics using %s formulas for %s using %s have been saved in the 'balance/%s' folder. \n", form_name, exposure, weights_method, folder)) - # cat(paste0("Sampling weights ", "using the ", form_name, " for ", exposure, - # " have been saved in the 'balance/", folder, "' folder"), "\n") cat(sprintf("Sampling weights using the %s for %s have been saved in the 'balance/%s' folder., \n", form_name, exposure, folder)) } @@ -579,9 +565,11 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # tallies total possible COVARIATES FROM FORM FOR ASSESSING BALANCE all_form <- as.data.frame(do.call(rbind, formulas)) tot_covars <- deparse(all_form[, 3], width.cutoff = 300) - tot_covars <- as.character(unlist(strsplit(tot_covars, "\\+")))[!grepl("form", as.character(unlist(strsplit(tot_covars, "\\+"))))] + tot_covars <- as.character(unlist(strsplit(tot_covars, "\\+")))[ + !grepl("form", as.character(unlist(strsplit(tot_covars, "\\+"))))] tot_covars <- gsub(" ", "", tot_covars) - tot_covars <- na.omit(sapply(strsplit(tot_covars, "\\."), "[", 1)[!duplicated(sapply(strsplit(tot_covars, "\\."), "[", 1))]) + tot_covars <- na.omit(sapply(strsplit(tot_covars, "\\."), "[", 1)[ + !duplicated(sapply(strsplit(tot_covars, "\\."), "[", 1))]) imbalanced_covars <- sum(bal_summary_exp$imbalanced_n, na.rm = TRUE) total_covars <- sum(bal_summary_exp$n, na.rm = TRUE) @@ -601,23 +589,14 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (verbose){ if (data_type == "imputed"){ - # cat(paste0("USER ALERT: For exposure ", exposure, " imputation ", k, " using ", weights_method, " and ", form_name, " formulas: "), "\n") cat(sprintf("USER ALERT: For exposure %s imputation %s using %s and %s formulas: \n", exposure, k, weights_method, form_name)) - # cat(paste0("The median absolute value relation between exposure and confounder is ", round(median(abs(all_bal_stats$std_bal_stats)), 2), " (range = ", - # round(min(all_bal_stats$std_bal_stats), 2), "-", round(max(all_bal_stats$std_bal_stats), 2), ")."), "\n") cat(sprintf("The median absolute value relation between exposure and confounder is %s (range = %s - %s).\n", round(median(abs(all_bal_stats$std_bal_stats)), 2), round(min(all_bal_stats$std_bal_stats), 2), round(max(all_bal_stats$std_bal_stats), 2))) - # cat(paste0("As shown below, ", imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced, - # "%) covariates across time points, corresponding to ", - # remaining_imbalanced_domains, " out of ", total_domains, - # " domains, remain imbalanced with a remaining median absolute value correlation/std mean difference of ", - # remaining_avg_abs_corr, " (range= ", remaining_corr_range, "):"), "\n") - cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %sout of %s domains, remain imbalanced with a remaining median absolute value correlation/std mean difference of %s (range= %s):\n", imbalanced_covars, @@ -629,21 +608,14 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ remaining_corr_range)) cat("\n") - cat(knitr::kable(bal_summary_exp, caption = - # paste0("Imbalanced Covariates for imputation ", k, " using ", - # weights_method, " and ", form_name, " formulas"), - sprintf("Imbalanced Covariates for imputation %s using %s and %s formulas", - k, weights_method, form_name), + cat(knitr::kable(bal_summary_exp, + caption = sprintf("Imbalanced Covariates for imputation %s using %s and %s formulas", + k, weights_method, form_name), format = 'pipe'), sep = "\n") cat("\n") cat("\n") } else { - # cat(paste0("As shown below, for exposure ", exposure, " using ", weights_method, ", and ", form_name, " formulas, ", - # imbalanced_covars, " out of ", total_covars, " (", percentage_imbalanced, "%) covariates across time points corresponding to ", - # remaining_imbalanced_domains, " out of ", total_domains, - # " domains remain imbalanced with a remaining average absolute value correlation/std mean difference of ", - # remaining_avg_abs_corr, " (range= ", remaining_corr_range, ") :"), "\n") cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %sout of %s domains, remain imbalanced with a remaining median absolute value correlation/std mean difference of %s (range= %s):\n", @@ -656,10 +628,8 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ remaining_corr_range)) cat("\n") - cat(knitr::kable(bal_summary_exp, caption = - # paste0("Imbalanced covariates using ", - # weights_method, " and ", form_name, " formulas"), - sprintf("Imbalanced covariates using %s and %s formulas", weights_method, form_name), + cat(knitr::kable(bal_summary_exp, + caption = sprintf("Imbalanced covariates using %s and %s formulas", weights_method, form_name), format = 'pipe'), sep = "\n") cat("\n") cat("\n") @@ -670,3 +640,4 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ all_bal_stats } +f diff --git a/R/compareHelpers.R b/R/compareHelpers.R index 1779fdea..2b0fd76c 100644 --- a/R/compareHelpers.R +++ b/R/compareHelpers.R @@ -127,7 +127,7 @@ create_custom_comparisons <- function(preds, ref_vals, comp_vals, exposure) { add_histories <- function(p, d) { - if((is.list(p)) & length(p) == 1){ + if((is.list(p)) && length(p) == 1){ history <- matrix(data = NA, nrow = nrow(p[[1]]), ncol = 1) # Get histories from the first element p <- p[[1]] } diff --git a/R/compareHistories.R b/R/compareHistories.R index 3767d4fe..1257f6ab 100644 --- a/R/compareHistories.R +++ b/R/compareHistories.R @@ -117,14 +117,14 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } - else if(!is.character(exposure) | length(exposure) != 1){ + else if(!is.character(exposure) || length(exposure) != 1){ stop("Please supply a single exposure as a character.", call. = FALSE) } if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } - else if(!is.character(outcome) | length(outcome) != 1){ + else if(!is.character(outcome) || length(outcome) != 1){ stop("Please supply a single outcome as a character.", call. = FALSE) } @@ -169,8 +169,8 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod } } - exposure_type <- ifelse(inherits(model[[1]]$data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric"), - "continuous", "binary") + exposure_type <- if(inherits(model[[1]]$data[, paste0(exposure, '.', exposure_time_pts[1])], "numeric")) + "continuous" else "binary" ints <- gsub(" ", "", as.character(unlist(strsplit(as.character(unlist(model[[1]]$terms)), "\\+")))) @@ -191,7 +191,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod data <- lapply(model, function(x) { x$data }) data <- do.call("rbind", data) } - if(!is.null(names(model))){ #single df (not imputed) + else if(!is.null(names(model))){ #single df (not imputed) data <- model[[1]]$data } @@ -201,7 +201,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod } else{ - if( !is.data.frame(epochs) | ncol(epochs) != 2 | sum(colnames(epochs) == c("epochs", "values")) != ncol(epochs)){ + if( !is.data.frame(epochs) || ncol(epochs) != 2 || sum(colnames(epochs) == c("epochs", "values")) != ncol(epochs)){ stop("If you supply epochs, please provide a dataframe with two columns of epochs and values.", call. = FALSE) } @@ -358,7 +358,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod } #conduct all pairwise comparisons if no ref/comparisons were specified by user - if (is.na(reference) & is.null(comp_histories)){ + if (is.na(reference) && is.null(comp_histories)){ # Pairwise comparisons; don't need to use custom class comps <- lapply(preds, function(y){ y |> marginaleffects::hypotheses("pairwise") @@ -390,10 +390,10 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod preds_pool <- add_dose(preds_pool, dose_level) # If the user specified reference and comparison groups, subset pred_pool for inspection and plotting - if (!is.na(reference) & !is.null(comp_histories)) { + if (!is.na(reference) && !is.null(comp_histories)) { # preds_pool <- preds_pool %>% # dplyr::filter(history %in% c(reference, comp_histories)) - preds_pool <- preds_pool[preds_pool$history %in% c(reference, comp_histories), ] + preds_pool <- preds_pool[preds_pool$history %in% c(reference, comp_histories), , drop = FALSE ] } @@ -401,9 +401,11 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod if(is.null(hi_lo_cut)){ hi_lo_cut <- "median+-.1" } + # Makes table of pooled average estimates and saves out - sink(paste0(home_dir, "/histories/", exposure, "-", outcome, "_pooled_estimated_means_hi_lo=", - paste(hi_lo_cut, collapse = "_"), ".html")) + outfile <- sprintf("%s/histories/%s-%s_pooled_estimated_means_hi_lo=%s.html", + home_dir, exposure, outcome, paste(hi_lo_cut, collapse = "_") ) + sink(outfile) stargazer::stargazer( as.data.frame(preds_pool), type = "html", @@ -412,9 +414,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod summary = FALSE, rownames = FALSE, header = FALSE, - out = paste0(home_dir, "/histories/", exposure, "-", outcome, "_pooled_estimated_means_hi_lo=", - hi_lo_cut, ".html" - ) + out = outfile ) sink() } @@ -422,7 +422,10 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod if (verbose){ cat("\n") cat("Below are the pooled average predictions by user-specified history:") # - cat(knitr::kable(preds_pool, format = 'pipe', digits = 4), sep = "\n") + cat(knitr::kable(preds_pool, + format = 'pipe', + digits = 4), + sep = "\n") cat("\n") } @@ -458,9 +461,11 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod if(is.null(hi_lo_cut)){ hi_lo_cut <- "median+-.1" } + # Makes table of pooled comparisons and saves out - sink(paste0(home_dir, "/histories/", exposure, "-", outcome, "_pooled_comparisons_hi_lo=", - paste(hi_lo_cut, collapse = "_"), ".html")) + outfile <- sprintf("%s/histories/%s-%s_pooled_comparisons_hi_lo=%s.html", + home_dir, exposure, outcome, paste(hi_lo_cut, collapse = "_") ) + sink(outfile) stargazer::stargazer( as.data.frame(comps_pool), type = "html", @@ -469,15 +474,17 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod summary = FALSE, rownames = FALSE, header = FALSE, - out = paste0(home_dir, "/histories/", exposure, "-", outcome, "_pooled_comparisons_hi_lo=", - paste(hi_lo_cut, collapse = "_"), ".html") + out = outfile ) sink() } if (verbose){ cat("\n") cat(paste0("USER ALERT: please inspect the following pooled comparisons :"), "\n") - cat(knitr::kable(comps_pool, format = 'pipe', digits = 4), sep = "\n") + cat(knitr::kable(comps_pool, + format = 'pipe', + digits = 4), + sep = "\n") cat("\n") cat("\n") } @@ -496,10 +503,10 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod preds <- add_dose(preds, dose_level) # If the user specified reference and comparison groups, subset preds for inspection and plotting - if (!is.na(reference) & !is.null(comp_histories)) { + if (!is.na(reference) && !is.null(comp_histories)) { # preds <- preds %>% # dplyr::filter(history %in% c(reference, comp_histories)) - preds <- preds[preds$history %in% c(reference, comp_histories), ] + preds <- preds[preds$history %in% c(reference, comp_histories), , drop = FALSE ] } @@ -507,10 +514,15 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod if(is.null(hi_lo_cut)){ hi_lo_cut <- "median+-.1" } + # Makes table of average estimates a lapply(seq_len(length(preds)), function(x) { y <- preds[[x]] - sink(paste0(home_dir, "/histories/", exposure, "-", outcome, "_estimated_means_hi_lo=", paste(hi_lo_cut, collapse = "_"), ".html")) + + outfile <- sprintf("%s/histories/%s-%s_estimated_means_hi_lo=%s.html", + home_dir, exposure, outcome, paste(hi_lo_cut, collapse = "_") ) + + sink(outfile) stargazer::stargazer( as.data.frame(y), type = "html", @@ -519,17 +531,20 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod summary = FALSE, rownames = FALSE, header = FALSE, - out = paste0(home_dir, "/histories/", exposure, "-", outcome, "_estimated_means_hi_lo=", paste(hi_lo_cut, collapse = "_"), ".html" - ) + out = outfile ) sink() + }) } if (verbose){ cat("\n") cat("Below are the average predictions by user-specified history:", "\n") # Not sure if we need to print this? - cat(knitr::kable(preds, format = 'pipe', digits = 4), sep = "\n") + cat(knitr::kable(preds, + format = 'pipe', + digits = 4), + sep = "\n") cat("\n") } @@ -561,9 +576,12 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod if(is.null(hi_lo_cut)){ hi_lo_cut <- "median+-.1" } + + outfile <- sprintf("%s/histories/%s-%s_comparisons_hi_lo=%s.html", + home_dir, exposure, outcome, paste(hi_lo_cut, collapse = "_") ) + # Makes table of comparisons and saves out - sink(paste0(home_dir, "/histories/", exposure, "-", outcome, "_comparisons_hi_lo=", - paste(hi_lo_cut, collapse = "_"), ".html")) + sink(outfile) stargazer::stargazer( as.data.frame(comps), type = "html", @@ -572,17 +590,19 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod summary = FALSE, rownames = FALSE, header = FALSE, - out = paste0(home_dir, "/histories/", exposure, "-", outcome, "_comparisons_hi_lo=", - hi_lo_cut, ".html" - ) + out = outfile ) sink() + } if (verbose) { cat("\n") cat(paste0("USER ALERT: please inspect the following comparisons:"), "\n") - cat(knitr::kable(comps, format = 'pipe', digits = 2), sep = "\n") + cat(knitr::kable(comps, + format = 'pipe', + digits = 2), + sep = "\n") cat("\n") cat("\n") } @@ -595,9 +615,13 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod if(!inherits(colors, "character")){ stop("Please provide a character string of a Brewer palette name or list of colors for plotting.", call. = FALSE) } - else if (length(colors) > 1 & length(colors) != nrow(epochs) + 1) { - stop(paste0('Please provide either: ', nrow(epochs) + 1, - ' different colors, a color palette, or leave this entry blank.'), call. = FALSE) + else if (length(colors) > 1 && length(colors) != nrow(epochs) + 1) { + stop( + # paste0('Please provide either: ', nrow(epochs) + 1, + # ' different colors, a color palette, or leave this entry blank.'), + sprint("Please provide either %s different colors, a Brewer color palette, or leave this entry blank. \n", + nrow(epochs) + 1), + call. = FALSE) } if (is.na(exp_lab)){ @@ -615,7 +639,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod # comparisons <- comparisons %>% # dplyr::arrange(dose) # Order by dose - comparisons <- comparisons[order(comparisons$dose), ] + comparisons <- comparisons[order(comparisons$dose), , drop = FALSE] if (length(colors) > 1) { # If user input a list of colors p <- ggplot2::ggplot(data = comparisons, ggplot2::aes(x = estimate, y = history, color = dose)) + @@ -632,7 +656,11 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod panel.background = ggplot2::element_blank(), axis.line = ggplot2::element_line(colour = "black")) if (save.out){ - ggplot2::ggsave(paste0(home_dir, "/plots/", exposure, "-", outcome, ".jpeg"), plot = p) + ggplot2::ggsave( + # paste0(home_dir, "/plots/", exposure, "-", outcome, ".jpeg"), + sprintf("%s/plots/%s-%s.jpeg", + home_dir, exposure, outcome), + plot = p) } if(verbose){ print(p) @@ -654,14 +682,18 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod ggplot2::guides(fill = ggplot2::guide_legend(title="Dosage")) if (save.out){ - ggplot2::ggsave(paste0(home_dir, "/plots/", exposure, "-", outcome, ".jpeg"), plot = p) + ggplot2::ggsave( + # paste0(home_dir, "/plots/", exposure, "-", outcome, ".jpeg"), + sprintf("%s/plots/%s-%s.jpeg", + home_dir, exposure, outcome), + plot = p) } if(verbose){ print(p) } } - if (save.out & verbose){ + if (save.out && verbose){ cat("\n") cat("See the '/plots/' folder for graphical representations of results.") } diff --git a/R/createFormulas.R b/R/createFormulas.R index 8fb1bef2..2befcc7e 100644 --- a/R/createFormulas.R +++ b/R/createFormulas.R @@ -106,14 +106,14 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } - else if(!is.character(exposure) | length(exposure) != 1){ + else if(!is.character(exposure) || length(exposure) != 1){ stop("Please supply a single exposure as a character.", call. = FALSE) } if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } - else if(!is.character(outcome) | length(outcome) != 1){ + else if(!is.character(outcome) || length(outcome) != 1){ stop("Please supply a single outcome as a character.", call. = FALSE) } @@ -143,15 +143,15 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, else if(!is.character(type)){ stop("Please provide a character string type from the following list: 'short', 'full', or 'update'", call. = FALSE) } - else if(! type %in% c("short", "full", "update") | length(type) != 1){ + else if(! type %in% c("short", "full", "update") || length(type) != 1){ stop("Please provide a single type from the following list: 'short', 'full', or 'update'", call. = FALSE) } - if (type != "update" & !is.null(bal_stats)){ + if (type != "update" && !is.null(bal_stats)){ stop ("Please only provide balance statistics for the type 'update'.", call. = FALSE) } - if(!is.null(bal_stats) & !is.data.frame(bal_stats)){ + if(!is.null(bal_stats) && !is.data.frame(bal_stats)){ stop("Please provide a data frame of balance statistics from the assessBalance function.", call. = FALSE) } @@ -174,14 +174,14 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, all_covars <- c(tv_confounders, ti_confounders) if (!is.null(custom)){ - if (length(custom) != length(exposure_time_pts) | !inherits(custom, "list")){ + if (length(custom) != length(exposure_time_pts) || !inherits(custom, "list")){ stop("If you wish to supply custom formulas, please provide a list of formulas for each exposure time point.", call. = FALSE) } forms <- custom } else{ - if (type != "update" & !is.null(bal_stats)){ + if (type != "update" && !is.null(bal_stats)){ stop ("Please only provide balance statistics for the type 'update'.", call. = FALSE) } @@ -263,9 +263,9 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, time_var_include <- c(time_var_include, new) if (verbose){ - # cat(paste0("For ", exposure, " at exposure time point ", time , - # ", the following covariate(s) will be added to the short balancing formula: "), paste(new, collapse = ", "), "\n") - cat(sprintf("For %s at exposure time point %s the following covariate(s) will be added to the short balancing formula: %s \n", + + cat(sprintf("For %s at exposure time point %s the following covariate(s) will be added to the short balancing formula: + %s \n", exposure, time, paste(new, collapse = ", "))) cat("\n") @@ -273,8 +273,7 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, } else{ if (verbose) { - # cat(paste0("For ", exposure, " at exposure time point ", time , - # " no time-varying confounders at additional lags were added."), "\n") + cat(sprintf("For %s at exposure time point %s no time-varying confounders at additional lags were added. \n", exposure, time )) cat("\n") @@ -345,15 +344,12 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, if(save.out){ # Writes forms_csv to a CSV file - forms_csv_file <- - # paste0(forms_dir, "/", type, "_", exposure, "-", outcome, "_", type, "_balancing_formulas.csv") - sprintf("%s/%s_%s-%s_%s_balancing_formulas.csv", + forms_csv_file <- sprintf("%s/%s_%s-%s_%s_balancing_formulas.csv", forms_dir, type, exposure, outcome, type) writeLines(forms_csv, con = forms_csv_file) # writes to rds - # forms_rds_file <- paste0(forms_dir, "/", type, "_", exposure, "-", outcome, "_", type, "_balancing_formulas.rds") forms_rds_file <- sprintf("%s/%s_%s-%s_%s_balancing_formulas.rds", forms_dir, type, exposure, outcome, type) diff --git a/R/createWeights.R b/R/createWeights.R index d2f549a9..063d3c51 100644 --- a/R/createWeights.R +++ b/R/createWeights.R @@ -79,7 +79,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = stop("Please supply data as either a dataframe with no missing data or imputed data in the form of a mids object or path to folder with imputed csv datasets.", call. = FALSE) } - else if (!mice::is.mids(data) & !is.data.frame(data) & !inherits(data, "list")) { + else if (!mice::is.mids(data) && !is.data.frame(data) && !inherits(data, "list")) { stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", call. = FALSE) } @@ -87,14 +87,14 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } - else if(!is.character(exposure) | length(exposure) != 1){ + else if(!is.character(exposure) || length(exposure) != 1){ stop("Please supply a single exposure as a character.", call. = FALSE) } if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } - else if(!is.character(outcome) | length(outcome) != 1){ + else if(!is.character(outcome) || length(outcome) != 1){ stop("Please supply a single outcome as a character.", call. = FALSE) } @@ -149,9 +149,12 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } if (read_in_from_file == "yes") { + tryCatch({ - weights <- readRDS(paste0(home_dir, "/weights/", exposure, "-", outcome, "_", form_name, "_", - weights_method, "_fit.rds")) + # weights <- readRDS(paste0(home_dir, "/weights/", exposure, "-", outcome, "_", form_name, "_", + # weights_method, "_fit.rds")) + weights <- readRDS(sprintf("%s/weights/%s-%s_%s_%s_fit.rds", + home_dir, exposure, outcome, form_name, weights_method)) if (verbose){ message("Reading in balancing weights from the local folder.") @@ -173,24 +176,24 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = if(weights_method == "super"){ fit <- WeightIt::weightitMSM(form, - data = data, - method = weights_method, - stabilize = TRUE, - density = "dt_2", #do we want this? - use.kernel = TRUE, - include.obj = TRUE, - SL.library = SL.library, - over = FALSE) + data = data, + method = weights_method, + stabilize = TRUE, + density = "dt_2", #do we want this? + use.kernel = TRUE, + include.obj = TRUE, + SL.library = SL.library, + over = FALSE) } else{ fit <- WeightIt::weightitMSM(form, - data = data, - method = weights_method, - stabilize = TRUE, - density = "dt_2", #do we want this? - use.kernel = TRUE, - include.obj = TRUE, - over = FALSE) + data = data, + method = weights_method, + stabilize = TRUE, + density = "dt_2", #do we want this? + use.kernel = TRUE, + include.obj = TRUE, + over = FALSE) } fit } @@ -214,31 +217,48 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = d$weights <- fit$weights if (verbose){ - cat(paste0("For imputation ", i, " and the ", weights_method, " weighting method, the median weight value is ", - round(median(fit$weights), 2) ," (SD= ", round(sd(fit$weights), 2), "; range= ", round(min(fit$weights), 2), "-", - round(max(fit$weights), 2), ")."), "\n") + + cat(sprintf("For imputation %s and the %s weighting method, the median weight value is %s + (SD= %s; range= %s-%s ). \n", + i, + weights_method, + round(median(fit$weights), 2), + round(sd(fit$weights), 2), + round(min(fit$weights), 2), + round(max(fit$weights), 2))) + cat('\n') } if(save.out){ # Save weights merged with ID variable - write.csv(x = d, file = paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", form_name, - "_", weights_method, "_", i, ".csv")) + write.csv(x = d, file = + # paste0(home_dir, "/weights/values/", exposure, "-", outcome, "_", form_name, + # "_", weights_method, "_", i, ".csv")) + sprintf("%s/weights/values/%s-%s_%s_%s_%s.csv", + home_dir, exposure, outcome, form_name, weights_method, i)) } # Writes image of the histogram of weights to assess heavy tails p <- ggplot2::ggplot(data = as.data.frame(fit$weight), ggplot2::aes(x = fit$weight)) + ggplot2::geom_histogram(color = 'black', bins = 15) + ggplot2::xlab("Weights") + - ggplot2::ggtitle(paste0("Distribution of ", weights_method, " weights")) + ggplot2::ggtitle( + # paste0("Distribution of ", weights_method, " weights")) + sprintf("Distribution of %s weights", + weights_method)) if(verbose){ print(p) } if(save.out){ - ggsave(paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, "_", form_name, - "_", weights_method, "_", i, ".png"), plot = p, height = 8, width = 14) + ggsave( + # paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, "_", form_name, + # "_", weights_method, "_", i, ".png"), + sprintf("%s/weights/histograms/Hist_%s-%s_%s_%s_%s.png", + home_dir, exposure, outcome, form_name, weights_method, i), + plot = p, height = 8, width = 14) } fit @@ -269,17 +289,29 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = d$weights <- fit$weights if (verbose){ - cat(paste0("For imputation", i, " and ", weights_method, - ", weighting method, the median weight value is ", round(median(fit$weights), 2) , - " (SD= ", round(sd(fit$weights), 2), "; range= ", round(min(fit$weights), 2), "-", - round(max(fit$weights), 2), ")."), "\n") + # cat(paste0("For imputation", i, " and ", weights_method, + # ", weighting method, the median weight value is ", round(median(fit$weights), 2) , + # " (SD= ", round(sd(fit$weights), 2), "; range= ", round(min(fit$weights), 2), "-", + # round(max(fit$weights), 2), ")."), "\n") + cat(sprintf("For imputation %s and the %s weighting method, the median weight value is %s + (SD= %s; range= %s-%s ). \n", + i, + weights_method, + round(median(fit$weights), 2), + round(sd(fit$weights), 2), + round(min(fit$weights), 2), + round(max(fit$weights), 2))) cat('\n') } if(save.out){ # Save weights merged with ID variable - write.csv(x = d, file = paste0(home_dir, "/weights/values/", exposure, "-", outcome, - "_", form_name, "_", weights_method, "_", i, ".csv")) + write.csv(x = d, + file = + # paste0(home_dir, "/weights/values/", exposure, "-", outcome, + # "_", form_name, "_", weights_method, "_", i, ".csv")) + sprintf("%s/weights/values/%s-%s_%s_%s_%s.csv", + home_dir, exposure, outcome, form_name, weights_method, i)) } # Writes image of the histogram of weights to assess heavy tails @@ -293,9 +325,12 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } if(save.out){ - ggplot2::ggsave(paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, - "_", form_name, "_", weights_method, "_", i, ".png"), plot = p, - height = 8, width = 14) + ggplot2::ggsave( + # paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, + # "_", form_name, "_", weights_method, "_", i, ".png"), + sprintf("%s/weights/histograms/Hist_%s-%s_%s_%s_%s.png", + home_dir, exposure, outcome, form_name, weights_method, i), + plot = p, height = 8, width = 14) } fit @@ -325,9 +360,17 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = data$weights <- weights[[1]]$weights if (verbose){ - cat(paste0("For the ", weights_method, " weighting method, the median weight value is ", - round(median(data$weights), 2) , " (SD = ", round(sd(data$weights), 2), "; range = ", - round(min(data$weights), 2), "-", round(max(data$weights), 2), ")."), "\n") + # cat(paste0("For the ", weights_method, " weighting method, the median weight value is ", + # round(median(data$weights), 2) , " (SD = ", round(sd(data$weights), 2), "; range = ", + # round(min(data$weights), 2), "-", round(max(data$weights), 2), ")."), "\n") + cat(sprintf("For the %s weighting method, the median weight value is %s + (SD = %s; range = %s-%s. \n", + weights_method, + round(median(data$weights), 2), + round(sd(data$weights), 2), + round(min(data$weights), 2), + round(max(data$weights)))) + cat('\n') } @@ -335,23 +378,34 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = if(save.out){ # Save weights merged with ID variable - write.csv(x = data, file = paste0(home_dir, "/weights/values/", exposure, "-", outcome, - "_", form_name, "_", weights_method, ".csv")) + write.csv(x = data, file = + # paste0(home_dir, "/weights/values/", exposure, "-", outcome, + # "_", form_name, "_", weights_method, ".csv")) + sprintf("%s/weights/values/%s-%s_%s_%s.csv", + home_dir, exposure, outcome, form_name, weights_method)) } # Writes image of the histogram of weights to assess heavy tails p <- ggplot2::ggplot(data = as.data.frame(weights[[1]]$weights), ggplot2::aes(x = weights[[1]]$weights)) + ggplot2::geom_histogram(color = 'black', bins = 15) + ggplot2::xlab("Weights") + - ggplot2::ggtitle(paste0("Distribution of ", weights_method, " weights")) + ggplot2::ggtitle( + # paste0("Distribution of ", weights_method, " weights")) + sprintf("Distribution of %s weights", + weights_method)) if(verbose){ print(p) } if(save.out){ - ggplot2::ggsave(paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, - "_", form_name, "_", weights_method, ".png"), plot = p, height = 8, width = 14) + ggplot2::ggsave( + # paste0(home_dir, "/weights/histograms/", "Hist_", exposure, "-", outcome, + # "_", form_name, "_", weights_method, ".png"), + sprintf("%s/weights/histograms/Hist_%s-%s_%s_%s.png", + home_dir, exposure, outcome, form_name, weights_method), + plot = p, height = 8, width = 14) + if (verbose){ cat("Weights have now been saved into the 'weights/values/' folder.") cat("\n") @@ -361,10 +415,14 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } if(save.out){ - saveRDS(weights, file = paste0(home_dir, "/weights/", exposure, "-", outcome, - "_", form_name, "_", weights_method, "_fit.rds")) + saveRDS(weights, file = + # paste0(home_dir, "/weights/", exposure, "-", outcome, + # "_", form_name, "_", weights_method, "_fit.rds")) + sprintf("%s/weights/%s-%s_%s_%s_fit.rds", + home_dir, exposure, outcome, form_name, weights_method)) if (verbose){ - cat("Weights models have been saved as an .rds object in the 'weights' folder.") + cat("\n") + cat("Weights models have been saved as an .rds object in the 'weights' folder.", "\n") } } diff --git a/R/eval_hist.R b/R/eval_hist.R index 55fa3f8a..4db8e12d 100644 --- a/R/eval_hist.R +++ b/R/eval_hist.R @@ -65,7 +65,6 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, data_wide <- data - #new will always have exposure main effects (ether exposure time points or epochs) # Lists out exposure-epoch combos if( is.null(epochs)){ #making epochs time pts if not specified by user @@ -108,7 +107,7 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, paste, sep = "", collapse = "-") # Assigning history (e.g., h-h-h) based on user-specified hi/lo cutoffs - if( !is.na(ref) & !is.null(comps)){ + if( !is.na(ref) && !is.null(comps)){ tot_hist <- tot_hist[tot_hist %in% c(ref, comps)] } @@ -204,7 +203,7 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, his_summ <- aggregate( ID ~ history, data = new, FUN = function(x) n = length(x)) - if( !is.na(ref) & !is.null(comps)){ + if( !is.na(ref) && !is.null(comps)){ his_sum <- his_summ[his_summ$history %in% c(ref, comps), ] } @@ -255,23 +254,17 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, sufficient spread to avoid extrapolation and low precision:", "\n") if (nrow(his_summ) != length(tot_hist)) { - # cat(paste0("USER ALERT: There are no individuals in your sample that fall into ", - # paste(tot_hist[!tot_hist %in% his_summ$history], collapse = " & "), - # " exposure history/histories. You may wish to consider different high/low cutoffs (for continuous exposures), alternative epochs, or choose a different measure to avoid extrapolation."), "\n") - cat(sprintf("USER ALERT: There are no individuals in your sample that fall into %s exposure history/histories. + + warning(sprintf("USER ALERT: There are no individuals in your sample that fall into %s exposure history/histories. You may wish to consider different high/low cutoffs (for continuous exposures), alternative epochs, or choose a different measure to avoid extrapolation.\n", - paste(tot_hist[!tot_hist %in% his_summ$history], collapse = " & "))) + paste(tot_hist[!tot_hist %in% his_summ$history], collapse = " & ")), call. = FALSE) cat("\n") } cat("\n") - cat(knitr::kable(his_summ, caption = - # paste0("Summary of user-specified exposure ", exposure, - # " histories based on exposure main effects ", paste(epochs$epochs, collapse = ", "), - # " containing time points ", paste(epochs$values, collapse = ", "), ":"), - sprintf("Summary of user-specified exposure %s histories based on exposure main effects %s + cat(knitr::kable(his_summ, caption = sprintf("Summary of user-specified exposure %s histories based on exposure main effects %s containing time points %s:", - exposure, paste(epochs$epochs, collapse = ", "), paste(epochs$values, collapse = ", ")), + exposure, paste(epochs$epochs, collapse = ", "), paste(epochs$values, collapse = ", ")), format = 'pipe', row.names = F), sep = "\n") } diff --git a/R/fitModel.R b/R/fitModel.R index bfbebf51..dcdeb69c 100644 --- a/R/fitModel.R +++ b/R/fitModel.R @@ -122,21 +122,21 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco stop("Please supply data as either a dataframe with no missing data or imputed data in the form of a mids object or path to folder with imputed csv datasets.", call. = FALSE) } - else if (!mice::is.mids(data) & !is.data.frame(data) & !inherits(data, "list")) { + else if (!mice::is.mids(data) && !is.data.frame(data) && !inherits(data, "list")) { stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", call. = FALSE) } if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } - else if(!is.character(exposure) | length(exposure) != 1){ + else if(!is.character(exposure) || length(exposure) != 1){ stop("Please supply a single exposure as a character.", call. = FALSE) } if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } - else if(!is.character(outcome) | length(outcome) != 1){ + else if(!is.character(outcome) || length(outcome) != 1){ stop("Please supply a single outcome as a character.", call. = FALSE) } @@ -157,16 +157,16 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco if (missing(model)){ stop('Please provide an outcome model selection "m" from 0-3 (e.g., "m1")', call. = FALSE) } - else if (!is.character(model) | length(model) != 1){ + else if (!is.character(model) || length(model) != 1){ stop('Please provide a single outcome model selection "m" from 0-3 (e.g., "m1")', call. = FALSE) } if (!(model %in% c("m0", "m1", "m2", "m3"))) { stop('Please provide a valid model "m" from 0-3 (e.g., "m1")', call. = FALSE) } - if ((model == "m2" | model == "m3") & (is.na(int_order) | !is.numeric(int_order))){ + if ((model == "m2" | model == "m3") && (is.na(int_order) || !is.numeric(int_order))){ stop("Please provide an integer interaction order if you select a model with interactions.", call. = FALSE) } - if ((model == "m1" | model == "m3") & (is.null(covariates) | !is.character(covariates))){ + if ((model == "m1" | model == "m3") && (is.null(covariates) || !is.character(covariates))){ stop("Please provide a list of covariates as characters if you select a covariate model.", call. = FALSE) } @@ -226,7 +226,7 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco } else{ - if( !is.data.frame(epochs) | ncol(epochs) != 2 | sum(colnames(epochs) == c("epochs", "values")) != ncol(epochs)){ + if( !is.data.frame(epochs) || ncol(epochs) != 2 || sum(colnames(epochs) == c("epochs", "values")) != ncol(epochs)){ stop("If you supply epochs, please provide a dataframe with two columns of epochs and values.", call. = FALSE) } @@ -235,10 +235,8 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco } } - exp_epochs <- apply(expand.grid(exposure, as.character(epochs[, 1])), 1, paste, sep = "", collapse = ".") - #getting null comparison if (model == "m0") {n <- "int"} else if (model == "m1") {n <- "covs"} @@ -350,7 +348,7 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco - if (mice::is.mids(data) | inherits(data, "list")){ + if (mice::is.mids(data) || inherits(data, "list")){ names(fits) <- seq_len(length(fits)) @@ -373,7 +371,10 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco to.file = "docx", statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), model.names = c(paste0("Imp.", seq_len(length(fits)))), - file.name = file.path(home_dir, "models", paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx")) + file.name = file.path(home_dir, "models", + # paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx")) + sprintf("%s-%s_%s_table_mod_ev.docx", + exposure, outcome, model)) )) } @@ -383,19 +384,23 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco if (verbose){ require(survey) cat(paste0("The marginal model, ", model, ", is summarized below:"), "\n") - print(suppressWarnings(jtools::export_summs( - fits, - statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared")))) + print(suppressWarnings( + jtools::export_summs( + fits, + statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared")))) } if (save.out){ # requireNamespace(officer) #is there another way to do this? required for writing to word # requireNamespace(flextable) # " " - suppressWarnings(jtools::export_summs(fits, - to.file = "docx", - statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), - file.name = file.path(home_dir, "models", paste0(exposure, "-", outcome, "_", model, - "_table_mod_ev.docx")))) + suppressWarnings( + jtools::export_summs(fits, + to.file = "docx", + statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"), + file.name = file.path(home_dir, "models", + # paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx")))) + sprintf("%s-%s_%s_table_mod_ev.docx", + exposure, outcome, model)))) } } @@ -403,7 +408,10 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco if(save.out){ saveRDS(fits, - file = file.path(home_dir, "/models/", paste0(exposure, "-", outcome, "_", model, "_model.rds"))) + file = file.path(home_dir, "/models/", + # paste0(exposure, "-", outcome, "_", model, "_model.rds"))) + sprintf("%s-%s_%s_model.rds", + exposure, outcome, model))) cat("\n") if (verbose){ diff --git a/R/getModel.R b/R/getModel.R index 31ba5656..1fa1cee3 100644 --- a/R/getModel.R +++ b/R/getModel.R @@ -126,6 +126,7 @@ getModel <- function(d, exposure, exposure_time_pts, outcome, epochs, exp_epochs }), collapse = " + " ) + #create interactions in data for (x in seq_len(length(unlist(strsplit(interactions, "\\+"))))) { name <- gsub(" ", "", unlist(strsplit(interactions, "\\+"))[x]) diff --git a/R/make_love_plot.R b/R/make_love_plot.R index b9491508..27c597fd 100644 --- a/R/make_love_plot.R +++ b/R/make_love_plot.R @@ -1,6 +1,6 @@ #' Create love plots showing balancing statistics #' -#' @param home_dir path to home directory +#' @param home_dir path to home directory (required if save.out = TRUE) #' @param folder folder path for saving #' @param exposure name of exposure variable #' @param exposure_time_pt exposure time point integer @@ -92,7 +92,7 @@ make_love_plot <- function(home_dir, folder, exposure, exposure_time_pt, exposur x_lab <- if(exposure_type == "continuous") "Correlation with Exposure" else "Standardized Mean Difference Between Exposures" - labels <- if(sum(balance_stats$balanced == 0) > 0) balance_stats$covariate else "" + labels <- ifelse(balance_stats$balanced == 0, balance_stats$covariate, "") min_val <- if(min(balance_stats[, "avg_bal"]) < 0) min(balance_stats[, "avg_bal"]) - 0.05 else min(balance_thresh) - 0.05 if (min_val > -(max(balance_thresh))){ diff --git a/R/trimWeights.R b/R/trimWeights.R index c30240f5..1b78a0c4 100644 --- a/R/trimWeights.R +++ b/R/trimWeights.R @@ -74,7 +74,7 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) } - else if(!is.character(exposure) | length(exposure) != 1){ + else if(!is.character(exposure) || length(exposure) != 1){ stop("Please supply a single exposure as a character.", call. = FALSE) } @@ -88,7 +88,7 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) } - else if(!is.character(outcome) | length(outcome) != 1){ + else if(!is.character(outcome) || length(outcome) != 1){ stop("Please supply a single outcome as a character.", call. = FALSE) } @@ -150,9 +150,8 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v # Save histogram of new weights p <- ggplot2::ggplot(as.data.frame(t), ggplot2::aes(x = t)) + ggplot2::geom_histogram(color = 'black', bins = 15) + - ggplot2::ggtitle( - # paste0("Weights trimmed at the ", quantile, "th value")) - sprintf("Weights trimmed at the %s th value", quantile)) + ggplot2::ggtitle(sprintf("Weights trimmed at the %s th value", + quantile)) if(verbose){ print(p) @@ -165,7 +164,8 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v exposure, outcome, weights[[x]]$method, quantile, x), path = # paste0(home_dir, "/weights/histograms/"), - sprintf("%s/weights/histograms/", home_dir), + sprintf("%s/weights/histograms/", + home_dir), plot = p, height = 8, width = 14) } diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd index 3519bcd9..27b9f3b2 100644 --- a/examplePipelineRevised.Rmd +++ b/examplePipelineRevised.Rmd @@ -47,7 +47,7 @@ library(devtools) install_github("istallworthy/devMSMs") library(devMSMs) -#get helper files from github +#get helper files from github. Note: you need to install devMSMs for inspectData to work. install_github("istallworthy/devMSMsHelpers") library(devMSMsHelpers) @@ -56,7 +56,7 @@ library(devMSMsHelpers) #prior to re-installing using the code above. Sorry, this is annoying! #conducting package checks & tests from documentation --just for IS to run; comment out when testing the workflow -devtools::check() +# devtools::check() ``` @@ -67,12 +67,16 @@ devtools::check() set.seed(1234) +#required if you wish to use save.out = TRUE in the functions home_dir <- '/Users/isabella/Library/CloudStorage/Box-Box/BSL General/MSMs/testing/isa' #no / after +#required exposure <- "ESETA1" +#required exposure_time_pts <- c(6, 15, 24, 35, 58) +#required outcome <- "EF_avg_perc.58" #these are now optional @@ -94,6 +98,7 @@ tv_confounders <- c("SAAmylase.6","SAAmylase.15", "SAAmylase.24", "StrDif_Tot.35", "StrDif_Tot.58", "EF_avg_perc.35", "EF_avg_perc.58") +#required ti_confounders <- c("state", "BioDadInHH2", "PmAge2", "PmBlac2", "TcBlac2", "PmMrSt2", "PmEd2", "KFASTScr", "RMomAgeU", "RHealth", "HomeOwnd", "SWghtLB", "SurpPreg", "SmokTotl", "DrnkFreq", "peri_health", "mat_health", "gov_asst") @@ -201,6 +206,7 @@ data$HomeOwnd = as.factor(data$HomeOwnd) library(dplyr) data <- data %>% dplyr::select(-c(contains(c(":", "Childhood", "Infancy", "Toddlerhood", "pcx")))) #removing these --came from old dataset +data$ID = as.numeric(as.character(unlist(data$ID))) ``` @@ -652,7 +658,8 @@ models <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome #minimum inputs model <- "m0" -models <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, model, epochs = epochs) +models <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, model, + epochs = epochs) #some optional inputs models <- fitModel(home_dir, data, weights, exposure, exposure_time_pts, outcome, model, diff --git a/man/make_love_plot.Rd b/man/make_love_plot.Rd index d900c5ae..7f23671b 100644 --- a/man/make_love_plot.Rd +++ b/man/make_love_plot.Rd @@ -22,7 +22,7 @@ make_love_plot( ) } \arguments{ -\item{home_dir}{path to home directory} +\item{home_dir}{path to home directory (required if save.out = TRUE)} \item{folder}{folder path for saving} diff --git a/man/trimWeights.Rd b/man/trimWeights.Rd index a3ff8fce..637a9420 100644 --- a/man/trimWeights.Rd +++ b/man/trimWeights.Rd @@ -15,7 +15,7 @@ trimWeights( ) } \arguments{ -\item{home_dir}{path to home directory} +\item{home_dir}{path to home directory (required if save.out = TRUE)} \item{exposure}{name of exposure variable} From f5c8def6edde6ab620affa4974f1ca439c79099e Mon Sep 17 00:00:00 2001 From: "Isabella Stallworthy, PhD" Date: Thu, 28 Sep 2023 11:56:16 -0400 Subject: [PATCH 10/12] fixes --- R/assessBalance.R | 21 ++++++- R/calcBalStats.R | 116 +++++++++++++++++++++---------------- R/compareHistories.R | 2 +- R/createFormulas.R | 2 +- R/createWeights.R | 8 +-- R/eval_hist.R | 2 - R/fitModel.R | 10 ++-- R/trimWeights.R | 2 +- examplePipelineRevised.Rmd | 5 +- man/eval_hist.Rd | 3 - 10 files changed, 101 insertions(+), 70 deletions(-) diff --git a/R/assessBalance.R b/R/assessBalance.R index ab1925a9..db13d9ab 100644 --- a/R/assessBalance.R +++ b/R/assessBalance.R @@ -140,6 +140,11 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, !(is.list(data) && all(vapply(data, is.data.frame, logical(1L))))) { stop("Please provide either a 'mids' object, a data frame, or a list of imputed csv files in the 'data' field.", call. = FALSE) } + else if(is.list(data) && !is.data.frame(data)){ + if (sum(sapply(data, is.list)) != length(data)){ + stop("Please supply a list of data frames that have been imputed.", call. = FALSE) + } + } if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) @@ -165,12 +170,18 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, if (missing(formulas)){ stop("Please supply a list of balancing formulas.", call. = FALSE) } - else if(!inherits(formulas, "list")){ + else if(!is.list(formulas) | is.data.frame(formulas)){ stop("Please provide a list of formulas for each exposure time point", call. = FALSE) } else if(length(formulas) != length(exposure_time_pts)){ stop("Please provide a list of formulas for each exposure time point", call. = FALSE) } + else if(is.list(formulas) && !is.data.frame(formulas)){ + if (sum(lapply(formulas, function(x) { + inherits(x, "formula")})) != length(formulas)){ + stop("Please supply a list of formulas for each exposure time point.", call. = FALSE) + } + } if (missing(type)){ stop("Please supply a 'weighted', 'prebalance' type", call. = FALSE) @@ -188,9 +199,15 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, stop("The 'weighted' mode of this function requires weights be supplied in the form of output from createWeights.", call. = FALSE) } - if (!is.null(weights) && !inherits(weights, "list")){ + if (!is.null(weights) && (!is.list(weights) || is.data.frame(weights))){ stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) } + else if(is.lis(weights) && !is.data.frame(weights)){ + if (sum(lapply(weights, function(x) { + inherits(x, "weightitMSM")})) != length(weights)){ + stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + } + } if(!is.numeric(balance_thresh)){ stop("Please provide one or two balance thresholds as numbers from 0-1.") diff --git a/R/calcBalStats.R b/R/calcBalStats.R index 3dcce37c..85052a76 100644 --- a/R/calcBalStats.R +++ b/R/calcBalStats.R @@ -78,7 +78,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_pts, outcome, balance_thresh, k = 0, weights = NULL, imp_conf = NULL, verbose = TRUE, save.out = TRUE){ - if(!inherits(formulas, "list")){ + if(!is.list(formulas) | is.data.frame(formulas)){ stop("Please provide a list of formulas for each exposure time point", call. = FALSE) } if (!is.null(weights) && !inherits(weights, "weightitMSM")){ @@ -218,10 +218,10 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (exp == 0) { # low levels/absent if (exposure_type == "continuous") { data$flag <- ifelse(data[, exps_time[t]] <= median(data[, paste0(exposure, ".", exposure_time_pt)]) - && data$flag == flag, t, NA) # finding those w/ vals <= median exp @ time pt & flagged at prev t's + & data$flag == flag, t, NA) # finding those w/ vals <= median exp @ time pt & flagged at prev t's } else { # for binary exp - data$flag <- ifelse(data[, exps_time[t]] == 0 && data$flag == flag, t , NA) # if exposure is absent & flagged at prev t's + data$flag <- ifelse(data[, exps_time[t]] == 0 & data$flag == flag, t , NA) # if exposure is absent & flagged at prev t's } } @@ -229,10 +229,10 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ if (exp == 1) { # hi levels/present if (exposure_type == "continuous") { data$flag <- ifelse(data[, exps_time[t]] > median(data[, paste0(exposure, ".", exposure_time_pt)]) - && data$flag == flag, t, NA) # finding those w/ vals > median exp @ time pt & flagged at prev t's + & data$flag == flag, t, NA) # finding those w/ vals > median exp @ time pt & flagged at prev t's } else { # binary exp - data$flag <- ifelse(data[, exps_time[t]] == 1 && data$flag == flag, t , NA) # if exposure is present & flagged at prev t's + data$flag <- ifelse(data[, exps_time[t]] == 1 & data$flag == flag, t , NA) # if exposure is present & flagged at prev t's } } @@ -258,8 +258,8 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ # Removing any histories that only have 1 or 0 person contributing (cannot calc bal stats) - if (sum(prop_sum$n == 1) > 0 | sum(prop_sum$n == 0) > 0) { - ommitted_histories <- as.character(as.data.frame(prop_sum)[prop_sum$n == 1, 1]) + if (sum(prop_sum$n == 1) > 0 || sum(prop_sum$n == 0) > 0) { + ommitted_histories <- as.character(as.data.frame(prop_sum)[prop_sum$n == 1 | prop_sum$n == 0, 1]) if (data_type == "imputed"){ # cat(paste0("USER ALERT: the following history/histories, ", ommitted_histories, @@ -278,7 +278,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ omitted_histories, exposure, exposure_time_pt)) } - temp <- temp[!temp$history %in% ommitted_histories, , drop = FALSE] + temp <- temp[!temp$history %in% ommitted_histories, ] } #ends hist exc # Unweighted pre-balance checking @@ -325,8 +325,10 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ temp2 <- temp[temp$history == i, , drop = FALSE ] - cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference - subset = temp2$history[temp2$history == i] == i) # subsetting by that history + cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE) # finding mean difference + + # cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference + # subset = temp2$history[temp2$history == i] == i) # subsetting by that history }) # getting weighted mean across histories, weighting by proportion of those w/ that same history @@ -372,7 +374,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ temp2 <- temp[temp$history == i, , drop = FALSE ] cobalt::col_w_cov(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding covariance - subset = temp2$history[temp2$history == i] == i, # subsetting by that history + # subset = temp2$history[temp2$history == i] == i, # subsetting by that history weights = temp2[, "weights"]) # adding IPTW weights }) @@ -413,7 +415,7 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ temp2 <- temp[temp$history == i, , drop = FALSE ] cobalt::col_w_smd(temp2[, c(covars)], temp2[, paste0(exposure, ".", exposure_time_pt)], std = FALSE, # finding mean difference - subset = temp2$history[temp2$history == i] == i, # subsetting by that history + # subset = temp2$history[temp2$history == i] == i, # subsetting by that history weights = temp2[, "weights"]) # adding IPTW weights }) @@ -573,17 +575,20 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ imbalanced_covars <- sum(bal_summary_exp$imbalanced_n, na.rm = TRUE) total_covars <- sum(bal_summary_exp$n, na.rm = TRUE) - percentage_imbalanced <- round((imbalanced_covars / total_covars) * 100, 0) - - remaining_imbalanced_domains <- length(sapply(strsplit(all_bal_stats[all_bal_stats$balanced == 0, "covariate"], "\\."), - "[", 1)[!duplicated(sapply(strsplit(all_bal_stats[all_bal_stats$balanced == 0, "covariate"], - "\\."), "[", 1))]) total_domains <- length(tot_covars) - # remaining_avg_abs_corr <- round(mean(abs(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"]), na.rm = TRUE), 2) - remaining_avg_abs_corr <- round(median(abs(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"]), na.rm = TRUE), 2) - remaining_corr_range <- paste0(round(min(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"], na.rm = TRUE), 2), - "-", round(max(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"], na.rm = TRUE), 2)) + if (imbalanced_covars > 0){ + percentage_imbalanced <- round((imbalanced_covars / total_covars) * 100, 0) + + remaining_imbalanced_domains <- length(sapply(strsplit(all_bal_stats[all_bal_stats$balanced == 0, "covariate"], "\\."), + "[", 1)[!duplicated(sapply(strsplit(all_bal_stats[all_bal_stats$balanced == 0, "covariate"], + "\\."), "[", 1))]) + + # remaining_avg_abs_corr <- round(mean(abs(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"]), na.rm = TRUE), 2) + remaining_avg_abs_corr <- round(median(abs(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"]), na.rm = TRUE), 2) + remaining_corr_range <- paste0(round(min(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"], na.rm = TRUE), 2), + "-", round(max(all_bal_stats[all_bal_stats$balanced == 0, "std_bal_stats"], na.rm = TRUE), 2)) + } if (verbose){ @@ -596,41 +601,54 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ round(median(abs(all_bal_stats$std_bal_stats)), 2), round(min(all_bal_stats$std_bal_stats), 2), round(max(all_bal_stats$std_bal_stats), 2))) + cat("\n") - cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %sout of %s domains, + if (imbalanced_covars > 0){ + cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %sout of %s domains, remain imbalanced with a remaining median absolute value correlation/std mean difference of %s (range= %s):\n", - imbalanced_covars, - total_covars, - percentage_imbalanced, - remaining_imbalanced_domains, - total_domains, - remaining_avg_abs_corr, - remaining_corr_range)) - - cat("\n") - cat(knitr::kable(bal_summary_exp, - caption = sprintf("Imbalanced Covariates for imputation %s using %s and %s formulas", - k, weights_method, form_name), - format = 'pipe'), sep = "\n") + imbalanced_covars, + total_covars, + percentage_imbalanced, + remaining_imbalanced_domains, + total_domains, + remaining_avg_abs_corr, + remaining_corr_range)) + + cat("\n") + cat(knitr::kable(bal_summary_exp, + caption = sprintf("Imbalanced Covariates for imputation %s using %s and %s formulas", + k, weights_method, form_name), + format = 'pipe'), sep = "\n") + } + else{ + cat(sprintf("No covariates remain imbalanced for imputation %s using %s and %s formulas. \n", + k, weights_method, form_name)) + } cat("\n") cat("\n") } else { - cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %sout of %s domains, + if (imbalanced_covars > 0){ + cat(sprintf("As shown below, %s out of %s ( %s%%) covariates across time points, corresponding to %s out of %s domains, remain imbalanced with a remaining median absolute value correlation/std mean difference of %s (range= %s):\n", - imbalanced_covars, - total_covars, - percentage_imbalanced, - remaining_imbalanced_domains, - total_domains, - remaining_avg_abs_corr, - remaining_corr_range)) - - cat("\n") - cat(knitr::kable(bal_summary_exp, - caption = sprintf("Imbalanced covariates using %s and %s formulas", weights_method, form_name), - format = 'pipe'), sep = "\n") + imbalanced_covars, + total_covars, + percentage_imbalanced, + remaining_imbalanced_domains, + total_domains, + remaining_avg_abs_corr, + remaining_corr_range)) + + cat("\n") + cat(knitr::kable(bal_summary_exp, + caption = sprintf("Imbalanced covariates using %s and %s formulas", weights_method, form_name), + format = 'pipe'), sep = "\n") + } + else{ + cat(sprintf("No covariates remain imbalanced using %s and %s formulas. \n", + weights_method, form_name)) + } cat("\n") cat("\n") } @@ -640,4 +658,4 @@ calcBalStats <- function(home_dir = NA, data, formulas, exposure, exposure_time_ all_bal_stats } -f + diff --git a/R/compareHistories.R b/R/compareHistories.R index 1257f6ab..b0b2cdbb 100644 --- a/R/compareHistories.R +++ b/R/compareHistories.R @@ -138,7 +138,7 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod if (missing(model)){ stop("Please supply a list of model output", call. = FALSE) } - else if(!inherits(model, "list")){ + else if(!is.list(model) || is.data.frame(model)){ stop("Please provide a list of model output from the fitModel function.", call. = FALSE) } diff --git a/R/createFormulas.R b/R/createFormulas.R index 2befcc7e..33e45648 100644 --- a/R/createFormulas.R +++ b/R/createFormulas.R @@ -174,7 +174,7 @@ createFormulas <- function(home_dir, exposure, exposure_time_pts, outcome, type, all_covars <- c(tv_confounders, ti_confounders) if (!is.null(custom)){ - if (length(custom) != length(exposure_time_pts) || !inherits(custom, "list")){ + if (length(custom) != length(exposure_time_pts) || !is.list(custom) || is.data.frame(custom)){ stop("If you wish to supply custom formulas, please provide a list of formulas for each exposure time point.", call. = FALSE) } diff --git a/R/createWeights.R b/R/createWeights.R index 063d3c51..01528f61 100644 --- a/R/createWeights.R +++ b/R/createWeights.R @@ -79,7 +79,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = stop("Please supply data as either a dataframe with no missing data or imputed data in the form of a mids object or path to folder with imputed csv datasets.", call. = FALSE) } - else if (!mice::is.mids(data) && !is.data.frame(data) && !inherits(data, "list")) { + else if (!inherits(data, "mids") && !is.data.frame(data) && !is.list(data)) { stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", call. = FALSE) } @@ -101,7 +101,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = if (missing(formulas)){ stop("Please supply a list of balancing formulas.", call. = FALSE) } - else if(!inherits(formulas, "list")){ + else if(!is.list(formulas) | is.data.frame(formulas)){ stop("Please provide a list of formulas for each exposure time point", call. = FALSE) } @@ -199,7 +199,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } - if(mice::is.mids(data)){ + if(inherits(data, "mids")){ # Cycling through imputed datasets weights <- lapply(seq_len(data$m), function(i) { d <- as.data.frame(mice::complete(data, i)) @@ -271,7 +271,7 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = } } - else if(inherits(data, "list")){ + else if(is.list(data) && !is.data.frame(data)){ # Cycling through list of imputed datasets weights <- lapply(seq_len(length(data)), function(i) { d <- data[[i]] diff --git a/R/eval_hist.R b/R/eval_hist.R index 4db8e12d..afe047e1 100644 --- a/R/eval_hist.R +++ b/R/eval_hist.R @@ -6,8 +6,6 @@ #' @param data data in wide format as: a data frame, list of imputed #' data frames, or mids object #' @param exposure name of exposure variable -#' @param tv_confounders list of time-varying confounders with ".timepoint" -#' suffix #' @param epochs (optional) data frame of exposure epoch labels and values #' @param time_pts list of integers at which weights will be #' created/assessed that correspond to time points when exposure was measured diff --git a/R/fitModel.R b/R/fitModel.R index dcdeb69c..f5d98b2b 100644 --- a/R/fitModel.R +++ b/R/fitModel.R @@ -122,7 +122,7 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco stop("Please supply data as either a dataframe with no missing data or imputed data in the form of a mids object or path to folder with imputed csv datasets.", call. = FALSE) } - else if (!mice::is.mids(data) && !is.data.frame(data) && !inherits(data, "list")) { + else if (!inherits(data, "mids") && !is.data.frame(data) && !is.list(data)) { stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", call. = FALSE) } @@ -143,7 +143,7 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco if (missing(weights)){ stop("Please supply a list of IPTW weights.", call. = FALSE) } - else if (!inherits(weights, "list")){ + else if (!is.list(weights) || is.data.frame(weights)){ stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) } @@ -251,7 +251,7 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco # fam <- family(link = l) # } - if (mice::is.mids(data)){ #imputed dataset + if (inherits(data, "mids")){ #imputed dataset fits <- lapply(seq_len(data$m), function(y) { @@ -285,7 +285,7 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco } - else if (inherits(data, "list")){ #imputed dataset + else if (is.list(data) && !is.data.frame(data)){ #imputed dataset fits <- lapply(seq_len(length(data)), function(y) { d <- data[[y]] @@ -348,7 +348,7 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco - if (mice::is.mids(data) || inherits(data, "list")){ + if (inherits(data, "mids") || is.list(data)){ names(fits) <- seq_len(length(fits)) diff --git a/R/trimWeights.R b/R/trimWeights.R index 1b78a0c4..eae937a7 100644 --- a/R/trimWeights.R +++ b/R/trimWeights.R @@ -81,7 +81,7 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v if (missing(weights)){ stop("Please supply a list of IPTW weights to trim.", call. = FALSE) } - else if (!inherits(weights, "list")){ + else if (!is.list(weights) || is.data.frame(weights)){ stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) } diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd index 27b9f3b2..0eae30b4 100644 --- a/examplePipelineRevised.Rmd +++ b/examplePipelineRevised.Rmd @@ -56,7 +56,7 @@ library(devMSMsHelpers) #prior to re-installing using the code above. Sorry, this is annoying! #conducting package checks & tests from documentation --just for IS to run; comment out when testing the workflow -# devtools::check() + devtools::check() ``` @@ -280,7 +280,8 @@ inspectData(data, home_dir, exposure, exposure_time_pts, outcome, tv_confounders #or minimum inputs -inspectData(data, home_dir, exposure, exposure_time_pts, outcome, tv_confounders, ti_confounders) +inspectData(data, home_dir, exposure, exposure_time_pts, outcome, ti_confounders, + tv_confounders = tv_confounders) ``` diff --git a/man/eval_hist.Rd b/man/eval_hist.Rd index c892b7dc..8169a509 100644 --- a/man/eval_hist.Rd +++ b/man/eval_hist.Rd @@ -39,9 +39,6 @@ reference, required if reference is supplied} \item{verbose}{(optional) TRUE or FALSE indicator for user output (default is TRUE)} - -\item{tv_confounders}{list of time-varying confounders with ".timepoint" -suffix} } \value{ none From a8f71bbc5ed424e8c9d262736c509f5020a770c2 Mon Sep 17 00:00:00 2001 From: "Isabella Stallworthy, PhD" Date: Thu, 28 Sep 2023 14:08:13 -0400 Subject: [PATCH 11/12] data type fixes --- R/assessBalance.R | 6 +++--- R/compareHistories.R | 4 ++++ R/createWeights.R | 5 +++++ R/eval_hist.R | 5 +++-- R/fitModel.R | 13 ++++++++++++- R/trimWeights.R | 6 ++++++ examplePipelineRevised.Rmd | 5 +++-- 7 files changed, 36 insertions(+), 8 deletions(-) diff --git a/R/assessBalance.R b/R/assessBalance.R index db13d9ab..b00b1aa8 100644 --- a/R/assessBalance.R +++ b/R/assessBalance.R @@ -141,7 +141,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, stop("Please provide either a 'mids' object, a data frame, or a list of imputed csv files in the 'data' field.", call. = FALSE) } else if(is.list(data) && !is.data.frame(data)){ - if (sum(sapply(data, is.list)) != length(data)){ + if (sum(sapply(data, is.data.frame)) != length(data)){ stop("Please supply a list of data frames that have been imputed.", call. = FALSE) } } @@ -177,7 +177,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, stop("Please provide a list of formulas for each exposure time point", call. = FALSE) } else if(is.list(formulas) && !is.data.frame(formulas)){ - if (sum(lapply(formulas, function(x) { + if (sum(sapply(formulas, function(x) { inherits(x, "formula")})) != length(formulas)){ stop("Please supply a list of formulas for each exposure time point.", call. = FALSE) } @@ -203,7 +203,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) } else if(is.lis(weights) && !is.data.frame(weights)){ - if (sum(lapply(weights, function(x) { + if (sum(sapply(weights, function(x) { inherits(x, "weightitMSM")})) != length(weights)){ stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) } diff --git a/R/compareHistories.R b/R/compareHistories.R index b0b2cdbb..1317aac6 100644 --- a/R/compareHistories.R +++ b/R/compareHistories.R @@ -141,6 +141,10 @@ compareHistories <- function(home_dir, exposure, exposure_time_pts, outcome, mod else if(!is.list(model) || is.data.frame(model)){ stop("Please provide a list of model output from the fitModel function.", call. = FALSE) } + else if (sum(sapply(model, function(x) { + inherits(x, "svyglm")})) != length(model)){ + stop("Please supply a model as a list from the createWeights function.", call. = FALSE) + } if(!is.logical(verbose)){ stop("Please set verbose to either TRUE or FALSE.", call. = FALSE) diff --git a/R/createWeights.R b/R/createWeights.R index 01528f61..3e693e42 100644 --- a/R/createWeights.R +++ b/R/createWeights.R @@ -83,6 +83,11 @@ createWeights <- function(home_dir, data, exposure, outcome, formulas, method = stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", call. = FALSE) } + else if(is.list(data) && !is.data.frame(data)){ + if (sum(sapply(data, is.data.frame)) != length(data)){ + stop("Please supply a list of data frames that have been imputed.", call. = FALSE) + } + } if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) diff --git a/R/eval_hist.R b/R/eval_hist.R index afe047e1..f6fed511 100644 --- a/R/eval_hist.R +++ b/R/eval_hist.R @@ -200,6 +200,7 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, new$history <- unlist(new$history) his_summ <- aggregate( ID ~ history, data = new, FUN = function(x) n = length(x)) + colnames(his_summ) <- c("history", "n") if( !is.na(ref) && !is.null(comps)){ his_sum <- his_summ[his_summ$history %in% c(ref, comps), ] @@ -216,7 +217,7 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, # hi_lo_cut[2] * 100, "th and ", hi_lo_cut[1] * 100, "th percentile values for low and high levels of exposure ", exposure, # ", respectively, across ", paste(epochs$epochs, collapse = ", ")), "\n") - cat(sprintf("USER ALERT: Out of the total of %s individuals in the sample, below is the distribution of the %s (%s%%) + cat(sprintf("USER ALERT: Out of the total of %s individuals in the sample, below is the distribution of the %s (%s%%) individuals that fall into %s out of the %s the total user-defined exposure histories created from %sth and %sth percentile values for low and high levels of exposure %s, respectively, across %s. \n", nrow(data_wide), @@ -236,7 +237,7 @@ eval_hist <- function(data, exposure, epochs = NULL, time_pts, hi_lo_cut = NULL, # " the total user-defined exposure histories created from median split values for low and high levels of exposure ", exposure, # ", respectively, across ", paste(epochs$epochs, collapse = ", ")), "\n") - cat(sprintf("USER ALERT: Out of the total of %s individuals in the sample, below is the distribution of the %s (%s%%) that fall into %s + cat(sprintf("USER ALERT: Out of the total of %s individuals in the sample, below is the distribution of the %s (%s%%) individuals that fall into %s out of the %s total user-defined exposure histories created from median split values for low and high levels of exposure %s, respectively, across %s. \n", nrow(data_wide), diff --git a/R/fitModel.R b/R/fitModel.R index f5d98b2b..f506178a 100644 --- a/R/fitModel.R +++ b/R/fitModel.R @@ -125,6 +125,11 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco else if (!inherits(data, "mids") && !is.data.frame(data) && !is.list(data)) { stop("Please provide either a 'mids' object, a data frame, or a list of imputed data frames in the 'data' field.", call. = FALSE) } + else if(is.list(data) && !is.data.frame(data)){ + if (sum(sapply(data, is.data.frame)) != length(data)){ + stop("Please supply a list of data frames that have been imputed.", call. = FALSE) + } + } if (missing(exposure)){ stop("Please supply a single exposure.", call. = FALSE) @@ -146,6 +151,12 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco else if (!is.list(weights) || is.data.frame(weights)){ stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) } + else if(is.list(weights) && !is.data.frame(weights)){ + if (sum(sapply(weights, function(x) { + inherits(x, "weightitMSM")})) != length(weights)){ + stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + } + } if (missing(exposure_time_pts)){ stop("Please supply the exposure time points at which you wish to create weights.", call. = FALSE) @@ -348,7 +359,7 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco - if (inherits(data, "mids") || is.list(data)){ + if (inherits(data, "mids") || (is.list(data)) && !is.data.frame(data)){ names(fits) <- seq_len(length(fits)) diff --git a/R/trimWeights.R b/R/trimWeights.R index eae937a7..a3470c17 100644 --- a/R/trimWeights.R +++ b/R/trimWeights.R @@ -84,6 +84,12 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v else if (!is.list(weights) || is.data.frame(weights)){ stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) } + else if(is.lis(weights) && !is.data.frame(weights)){ + if (sum(sapply(weights, function(x) { + inherits(x, "weightitMSM")})) != length(weights)){ + stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) + } + } if (missing(outcome)){ stop("Please supply a single outcome.", call. = FALSE) diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd index 0eae30b4..0f741ab4 100644 --- a/examplePipelineRevised.Rmd +++ b/examplePipelineRevised.Rmd @@ -162,6 +162,7 @@ data_wide <- data_wide[,colSums(is.na(data_wide)) < nrow(data_wide)] #removing vars w/no variability; specific to FLP data +library(dplyr) data_wide <- data_wide %>% dplyr::select(-c(RHasSO.6, RHasSO.15 , RHasSO.24)) @@ -280,8 +281,8 @@ inspectData(data, home_dir, exposure, exposure_time_pts, outcome, tv_confounders #or minimum inputs -inspectData(data, home_dir, exposure, exposure_time_pts, outcome, ti_confounders, - tv_confounders = tv_confounders) +inspectData(data, home_dir, exposure, exposure_time_pts, outcome, + ti_confounders, tv_confounders = tv_confounders) ``` From 76857e2073ee49c259cd088275ba1168350559b2 Mon Sep 17 00:00:00 2001 From: "Isabella Stallworthy, PhD" Date: Thu, 28 Sep 2023 14:13:10 -0400 Subject: [PATCH 12/12] typos --- R/assessBalance.R | 2 +- R/trimWeights.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/assessBalance.R b/R/assessBalance.R index b00b1aa8..9da10178 100644 --- a/R/assessBalance.R +++ b/R/assessBalance.R @@ -202,7 +202,7 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome, if (!is.null(weights) && (!is.list(weights) || is.data.frame(weights))){ stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) } - else if(is.lis(weights) && !is.data.frame(weights)){ + else if(is.list(weights) && !is.data.frame(weights)){ if (sum(sapply(weights, function(x) { inherits(x, "weightitMSM")})) != length(weights)){ stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) diff --git a/R/trimWeights.R b/R/trimWeights.R index a3470c17..d50d8772 100644 --- a/R/trimWeights.R +++ b/R/trimWeights.R @@ -84,7 +84,7 @@ trimWeights <- function(home_dir, exposure, outcome, weights, quantile = 0.95, v else if (!is.list(weights) || is.data.frame(weights)){ stop("Please supply a list of weights output from the createWeights function.", call. = FALSE) } - else if(is.lis(weights) && !is.data.frame(weights)){ + else if(is.list(weights) && !is.data.frame(weights)){ if (sum(sapply(weights, function(x) { inherits(x, "weightitMSM")})) != length(weights)){ stop("Please supply a list of weights output from the createWeights function.", call. = FALSE)