Skip to content

Commit

Permalink
Merge pull request #73 from istallworthy/run-through
Browse files Browse the repository at this point in the history
Meriah run through
  • Loading branch information
istallworthy authored Sep 17, 2023
2 parents c79f41d + 4e29b13 commit 90c3a53
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 107 deletions.
3 changes: 3 additions & 0 deletions R/assessBalance.R
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,9 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
}

else if (is.data.frame(data)){
if(!inherits(data, "data.frame")){
stop("Please provide data as a data frame.", call. = FALSE)
}
if (sum(duplicated(data$"ID")) > 0){
stop("Please provide wide dataset with a single row per ID.", call. = FALSE)
}
Expand Down
56 changes: 38 additions & 18 deletions R/createWeights.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
#' @importFrom WeightIt weightitMSM
#' @seealso {[WeightIt::WeightItMSM()], <url1>}
#' @param home_dir path to home directory
#' @param data data in wide format as: a data frame, list of imputed
#' data frames, or mids object
#' @param data data in wide format as: a data frame, list of imputed data
#' frames, or mids object
#' @param exposure name of exposure variable
#' @param outcome name of outcome variable with ".timepoint" suffix
#' @param tv_confounders list of time-varying confounders with ".timepoint"
Expand All @@ -21,11 +21,14 @@
#' createFormulas()
#' @param method (optional) character string of WeightItMSM() balancing method
#' abbreviation (default is Covariate Balancing Propensity Score "cbps")
#' @param SL.library required for superLearner weighting method; see
#' SuperLearner::listWrappers() for options
#' @param read_in_from_file (optional) "yes" or "no" indicator to read in
#' weights that have been previously run and saved locally (default is "no")
#' @param verbose (optional) TRUE or FALSE indicator for user output (default is
#' TRUE)
#' @param save.out (optional) TRUE or FALSE indicator to save output and intermediary output locally (default is TRUE)
#' @param save.out (optional) TRUE or FALSE indicator to save output and
#' intermediary output locally (default is TRUE)
#' @return list of IPTW balancing weights
#' @export
#' @examples
Expand Down Expand Up @@ -64,7 +67,8 @@
#' save.out = FALSE)


createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, formulas, method = "cbps", read_in_from_file = "no", verbose = TRUE, save.out = TRUE) {
createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, formulas, method = "cbps",
SL.library = NA, read_in_from_file = "no", verbose = TRUE, save.out = TRUE) {

if (save.out) {
if (missing(home_dir)) {
Expand Down Expand Up @@ -133,13 +137,15 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for

if (read_in_from_file == "yes") {
tryCatch({
weights <- readRDS(paste0(home_dir, "/weights/", exposure, "-", outcome, "_", form_name, "_", weights_method, "_fit.rds"))
weights <- readRDS(paste0(home_dir, "/weights/", exposure, "-", outcome, "_", form_name, "_",
weights_method, "_fit.rds"))

if (verbose){
message("Reading in balancing weights from the local folder.")
}
}, error = function(x) {
stop("These weights have not previously been saved locally. Please re-run with read_in_from_file='no'", call. = FALSE)
stop("These weights have not previously been saved locally. Please re-run with read_in_from_file='no'",
call. = FALSE)
})
weights
}
Expand All @@ -151,15 +157,29 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for
form <- unname(form)

# Helper function to calculate weights
calculate_weights <- function(data, form, weights_method) {
fit <- weightitMSM(form,
data = data,
method = weights_method,
stabilize = TRUE,
density = "dt_2", #do we want this?
use.kernel = TRUE,
include.obj = TRUE,
over = FALSE)
calculate_weights <- function(data, form, weights_method, SL.library) {

if(weights_method == "super"){
fit <- weightitMSM(form,
data = data,
method = weights_method,
stabilize = TRUE,
density = "dt_2", #do we want this?
use.kernel = TRUE,
include.obj = TRUE,
SL.library = SL.library,
over = FALSE)
}
else{
fit <- weightitMSM(form,
data = data,
method = weights_method,
stabilize = TRUE,
density = "dt_2", #do we want this?
use.kernel = TRUE,
include.obj = TRUE,
over = FALSE)
}
fit
}

Expand All @@ -177,7 +197,7 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for
stop("Please provide wide imputed datasets with a single row per ID.", call. = FALSE)
}

fit <- calculate_weights(d, form, weights_method)
fit <- calculate_weights(d, form, weights_method, SL.library)

d$weights <- fit$weights

Expand Down Expand Up @@ -230,7 +250,7 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for
stop("Please provide wide imputed datasets with a single row per ID.", call. = FALSE)
}

fit <- calculate_weights(d, form, weights_method)
fit <- calculate_weights(d, form, weights_method, SL.library)

d$weights <- fit$weights

Expand Down Expand Up @@ -285,7 +305,7 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for

# Creating weights
weights <- lapply(1, function(i) {
calculate_weights(data, form, weights_method)
calculate_weights(data, form, weights_method, SL.library)
})

data$weights <- weights[[1]]$weights
Expand Down
16 changes: 11 additions & 5 deletions R/fitModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
#' @seealso {[survey::svyglm()] for more on family/link specifications, <url1>}
#' @seealso {[createWeights()], <url1>}
#' @param home_dir path to home directory
#' @param data data in wide format as: a data frame, list of imputed
#' data frames, or mids object
#' @param data data in wide format as: a data frame, list of imputed data
#' frames, or mids object
#' @param weights list of IPTW weights output from createWeights()
#' @param exposure name of exposure variable
#' @param exposure_time_pts list of integers at which weights will be
Expand All @@ -32,7 +32,8 @@
#' @param epochs (optional) data frame of exposure epoch labels and values
#' @param verbose (optional) TRUE or FALSE indicator for user output (default is
#' TRUE)
#' @param save.out (optional) TRUE or FALSE indicator to save output and intermediary output locally (default is TRUE)
#' @param save.out (optional) TRUE or FALSE indicator to save output and
#' intermediary output locally (default is TRUE)
#' @return list of svyglm model output
#' @export
#' @examples
Expand Down Expand Up @@ -111,7 +112,9 @@
#' covariates = "C",
#' save.out = FALSE)

fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outcome, tv_confounders, model, family = gaussian, link = "identity", int_order = NA, covariates = NULL, epochs = NULL, verbose = TRUE, save.out = TRUE) {
fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outcome, tv_confounders, model,
family = gaussian, link = "identity", int_order = NA, covariates = NULL, epochs = NULL,
verbose = TRUE, save.out = TRUE) {

if (save.out) {
if (missing(home_dir)) {
Expand Down Expand Up @@ -333,8 +336,11 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco
}

if (save.out){
require(officer) #is there another way to do this? required for writing to word
require(flextable) # " "
suppressWarnings(jtools::export_summs(fits, to.file = "docx", statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"),
file.name = file.path(home_dir, "models", paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx"))))
file.name = file.path(home_dir, "models", paste0(exposure, "-", outcome, "_", model,
"_table_mod_ev.docx"))))
}

}
Expand Down
125 changes: 68 additions & 57 deletions R/inspectData.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,58 +146,6 @@ inspectData <- function(data, home_dir, exposure, exposure_time_pts, outcome, tv
exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', exposure_time_pts[1])],
"numeric"), "continuous", "binary")

# Data type
cat("The following variables are designated as numeric:", "\n")
print(paste(colnames(data)[sapply(data, class) == "numeric"], sep = ",", collapse = ", "))

cat("The following variables are designated as factors:", "\n")
print(paste(colnames(data)[sapply(data, class) == "factor"], sep = ",", collapse = ", "))

oth <- data.frame(variable = names(sapply(data, class)) [!sapply(data, class) %in% c("numeric", "factor")],
type = sapply(data, class) [!sapply(data, class) %in% c("numeric", "factor")])
cat(knitr::kable(oth, caption = "Other variable types",
format = 'pipe'), sep = "\n")


# Exposure summary
exposure_summary <- data %>%
dplyr:: select(colnames(data)[grepl(exposure, colnames(data))])
exposure_summary <- psych::describe(exposure_summary, fast = TRUE)


if (save.out){
knitr::kable(exposure_summary, caption = paste0("Summary of ", exposure, " Exposure Information"), format = 'html') %>%
kableExtra::kable_styling() %>%
kableExtra::save_kable(file = file.path(home_dir, paste0("/", exposure, "_exposure_info.html")))
if(verbose){
cat(knitr::kable(exposure_summary, caption = paste0("Summary of ", exposure, " Exposure Information"), format = 'pipe'), sep = "\n")
cat(paste0(exposure, " exposure descriptive statistics have now been saved in the home directory"), "\n")
cat("\n")
}
}


# Outcome summary
outcome_summary <- data %>%
dplyr:: select(contains(sapply(strsplit(outcome, "\\."), "[", 1)))
outcome_summary <- psych::describe(outcome_summary, fast = TRUE)

if(save.out){
knitr::kable(outcome_summary, caption = paste0("Summary of Outcome ",
sapply(strsplit(outcome, "\\."), "[", 1), " Information"), format = 'html') %>%
kableExtra::kable_styling() %>%
kableExtra::save_kable(file = file.path(home_dir, paste0("/", sapply(strsplit(outcome, "\\."), "[", 1), "_outcome_info.html")))

if (verbose){
cat(knitr::kable(outcome_summary, caption = paste0("Summary of Outcome ",
sapply(strsplit(outcome, "\\."), "[", 1), " Information"),
format = 'pipe'), sep = "\n")

cat(paste0(sapply(strsplit(outcome, "\\."), "[", 1), " outcome descriptive statistics have now been saved in the home directory"), "\n")
}
}



# Confounder summary
potential_covariates <- colnames(data)[!(colnames(data) %in% c(ID))]
Expand Down Expand Up @@ -238,7 +186,7 @@ inspectData <- function(data, home_dir, exposure, exposure_time_pts, outcome, tv

for (l in seq_len(nrow(test))) {
z = c(sapply(strsplit(all_potential_covariates[grepl(paste0(".", rownames(test)[l]),
all_potential_covariates)], "\\."), "[", 1), time_invar_covars)
all_potential_covariates)], "\\."), "[", 1), time_invar_covars)
z = z[!duplicated(z)]
test[l, z ] <- 1
}
Expand All @@ -255,16 +203,37 @@ inspectData <- function(data, home_dir, exposure, exposure_time_pts, outcome, tv

if(verbose){
print(glue::glue("See the home directory for a table and matrix displaying all covariates confounders considered at each exposure time point for {exposure} and {outcome}."), "\n")
cat("\n")

#-2 to exclude ID and WAVE
print(glue::glue("USER ALERT: Below are the {as.character(length(all_potential_covariates) - 2)} variables spanning {unique_vars - 2} unique domains that will be treated as confounding variables for the relation between {exposure} and {outcome}."), "\n",
"Please inspect this list carefully. It should include all time-varying covariates, time invariant covariates, as well as lagged levels of exposure and outcome variables if they were collected at time points earlier than the outcome time point.", "\n")
cat("\n")
print(glue::glue("USER ALERT: Below are the {as.character(length(all_potential_covariates) - 2)} variables spanning {unique_vars - 2} unique domains that will be treated as confounding variables for the relation between {exposure} and {outcome}."),
"Please inspect this list carefully. It should include all time-varying covariates, time invariant covariates, as well as lagged levels of exposure and outcome variables if they were collected at time points earlier than the outcome time point.", "\n")
print(all_potential_covariates[!(all_potential_covariates %in% c(ID))])
}
}


# Data type
cat("\n")
cat("The following variables are designated as numeric:", "\n")
print(paste(colnames(data)[sapply(data, class) == "numeric"], sep = ",", collapse = ", "))
cat("\n")

cat("The following variables are designated as factors:", "\n")
print(paste(colnames(data)[sapply(data, class) == "factor"], sep = ",", collapse = ", "))
cat("\n")

oth <- data.frame(variable = names(sapply(data, class)) [!sapply(data, class) %in% c("numeric", "factor")],
type = sapply(data, class) [!sapply(data, class) %in% c("numeric", "factor")])
if(nrow(oth) > 0 ){
cat(knitr::kable(oth, caption = "Other variable types",
format = 'pipe'), sep = "\n")
cat("\n")
}

if(sum(sapply(data, is.character)) > 0){
warning(paste0(paste(names(data)[sapply(data, is.character)], sep = ", ", collapse = ", "),
" are of class character.", " The package cannot accept character variables."), call. = FALSE)
}
#covariate correlations
covariates_to_include <- all_potential_covariates

Expand Down Expand Up @@ -298,6 +267,26 @@ inspectData <- function(data, home_dir, exposure, exposure_time_pts, outcome, tv
}


# Exposure summary
exposure_summary <- data %>%
dplyr:: select(colnames(data)[grepl(exposure, colnames(data))])
exposure_summary <- psych::describe(exposure_summary, fast = TRUE)


if (save.out){
knitr::kable(exposure_summary, caption = paste0("Summary of ", exposure, " Exposure Information"),
format = 'html') %>%
kableExtra::kable_styling() %>%
kableExtra::save_kable(file = file.path(home_dir, paste0("/", exposure, "_exposure_info.html")))
if(verbose){
cat(knitr::kable(exposure_summary, caption = paste0("Summary of ", exposure, " Exposure Information"),
format = 'pipe'), sep = "\n")
cat(paste0(exposure, " exposure descriptive statistics have now been saved in the home directory"), "\n")
cat("\n")
}
}


# Exposure history summary
if( is.null(epochs)){ #making epochs time pts if not specified by user
epochs <- data.frame(epochs = as.character(time_pts),
Expand All @@ -315,4 +304,26 @@ inspectData <- function(data, home_dir, exposure, exposure_time_pts, outcome, tv

eval_hist(data = data2, exposure, tv_confounders, epochs,
exposure_time_pts, hi_lo_cut, ref = reference, comps = comparison, verbose)



# Outcome summary
outcome_summary <- data[, grepl(sapply(strsplit(outcome, "\\."),
"[", 1), colnames(data))]
outcome_summary <- psych::describe(outcome_summary, fast = TRUE)

if(save.out){
knitr::kable(outcome_summary, caption = paste0("Summary of Outcome ",
sapply(strsplit(outcome, "\\."), "[", 1), " Information"), format = 'html') %>%
kableExtra::kable_styling() %>%
kableExtra::save_kable(file = file.path(home_dir, paste0("/", sapply(strsplit(outcome, "\\."), "[", 1), "_outcome_info.html")))

if (verbose){
cat(knitr::kable(outcome_summary, caption = paste0("Summary of Outcome ",
sapply(strsplit(outcome, "\\."), "[", 1), " Information"),
format = 'pipe'), sep = "\n")

cat(paste0(sapply(strsplit(outcome, "\\."), "[", 1), " outcome descriptive statistics have now been saved in the home directory"), "\n")
}
}
}
Loading

0 comments on commit 90c3a53

Please sign in to comment.