Merge pull request #73 from istallworthy/run-through

Meriah run through
istallworthy · Sep 17, 2023 · 90c3a53 · 90c3a53
2 parents c79f41d + 4e29b13
commit 90c3a53
Show file tree

Hide file tree

Showing 6 changed files with 177 additions and 107 deletions.
diff --git a/R/assessBalance.R b/R/assessBalance.R
@@ -337,6 +337,9 @@ assessBalance <- function(home_dir, data, exposure, exposure_time_pts, outcome,
     }
 
     else if (is.data.frame(data)){
+      if(!inherits(data, "data.frame")){
+        stop("Please provide data as a data frame.", call. = FALSE)
+      }
       if (sum(duplicated(data$"ID")) > 0){
         stop("Please provide wide dataset with a single row per ID.", call. = FALSE)
       }

diff --git a/R/createWeights.R b/R/createWeights.R
@@ -11,8 +11,8 @@
 #' @importFrom WeightIt weightitMSM
 #' @seealso {[WeightIt::WeightItMSM()], <url1>}
 #' @param home_dir path to home directory
-#' @param data data in wide format as: a data frame, list of imputed
-#'   data frames, or mids object
+#' @param data data in wide format as: a data frame, list of imputed data
+#'   frames, or mids object
 #' @param exposure name of exposure variable
 #' @param outcome name of outcome variable with ".timepoint" suffix
 #' @param tv_confounders list of time-varying confounders with ".timepoint"
@@ -21,11 +21,14 @@
 #'   createFormulas()
 #' @param method (optional) character string of WeightItMSM() balancing method
 #'   abbreviation (default is Covariate Balancing Propensity Score "cbps")
+#' @param SL.library required for superLearner weighting method; see
+#'   SuperLearner::listWrappers() for options
 #' @param read_in_from_file (optional) "yes" or "no" indicator to read in
 #'   weights that have been previously run and saved locally (default is "no")
 #' @param verbose (optional) TRUE or FALSE indicator for user output (default is
 #'   TRUE)
-#' @param save.out (optional) TRUE or FALSE indicator to save output and intermediary output locally (default is TRUE)
+#' @param save.out (optional) TRUE or FALSE indicator to save output and
+#'   intermediary output locally (default is TRUE)
 #' @return list of IPTW balancing weights
 #' @export
 #' @examples
@@ -64,7 +67,8 @@
 #'                    save.out = FALSE)
 
 
-createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, formulas, method = "cbps", read_in_from_file = "no", verbose = TRUE, save.out = TRUE) {
+createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, formulas, method = "cbps",
+                          SL.library = NA, read_in_from_file = "no", verbose = TRUE, save.out = TRUE) {
 
   if (save.out) {
     if (missing(home_dir)) {
@@ -133,13 +137,15 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for
 
   if (read_in_from_file == "yes") {
     tryCatch({
-      weights <- readRDS(paste0(home_dir, "/weights/", exposure, "-", outcome, "_", form_name, "_", weights_method, "_fit.rds"))
+      weights <- readRDS(paste0(home_dir, "/weights/", exposure, "-", outcome, "_", form_name, "_",
+                                weights_method, "_fit.rds"))
 
       if (verbose){
         message("Reading in balancing weights from the local folder.")
       }
     }, error = function(x) {
-      stop("These weights have not previously been saved locally. Please re-run with read_in_from_file='no'", call. = FALSE)
+      stop("These weights have not previously been saved locally. Please re-run with read_in_from_file='no'",
+           call. = FALSE)
     })
     weights
   }
@@ -151,15 +157,29 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for
     form <- unname(form)
 
     # Helper function to calculate weights
-    calculate_weights <- function(data, form, weights_method) {
-      fit <- weightitMSM(form,
-                         data = data,
-                         method = weights_method,
-                         stabilize = TRUE,
-                         density = "dt_2", #do we want this?
-                         use.kernel = TRUE,
-                         include.obj = TRUE,
-                         over = FALSE)
+    calculate_weights <- function(data, form, weights_method, SL.library) {
+
+      if(weights_method == "super"){
+        fit <- weightitMSM(form,
+                           data = data,
+                           method = weights_method,
+                           stabilize = TRUE,
+                           density = "dt_2", #do we want this?
+                           use.kernel = TRUE,
+                           include.obj = TRUE,
+                           SL.library = SL.library,
+                           over = FALSE)
+      }
+      else{
+        fit <- weightitMSM(form,
+                           data = data,
+                           method = weights_method,
+                           stabilize = TRUE,
+                           density = "dt_2", #do we want this?
+                           use.kernel = TRUE,
+                           include.obj = TRUE,
+                           over = FALSE)
+      }
       fit
     }
 
@@ -177,7 +197,7 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for
           stop("Please provide wide imputed datasets with a single row per ID.", call. = FALSE)
         }
 
-        fit <- calculate_weights(d, form, weights_method)
+        fit <- calculate_weights(d, form, weights_method, SL.library)
 
         d$weights <- fit$weights
 
@@ -230,7 +250,7 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for
           stop("Please provide wide imputed datasets with a single row per ID.", call. = FALSE)
         }
 
-        fit <- calculate_weights(d, form, weights_method)
+        fit <- calculate_weights(d, form, weights_method, SL.library)
 
         d$weights <- fit$weights
 
@@ -285,7 +305,7 @@ createWeights <- function(home_dir, data, exposure, outcome, tv_confounders, for
 
       # Creating weights
       weights <-  lapply(1, function(i) {
-        calculate_weights(data, form, weights_method)
+        calculate_weights(data, form, weights_method, SL.library)
       })
 
       data$weights <- weights[[1]]$weights

diff --git a/R/fitModel.R b/R/fitModel.R
@@ -9,8 +9,8 @@
 #' @seealso {[survey::svyglm()] for more on family/link specifications, <url1>}
 #' @seealso {[createWeights()], <url1>}
 #' @param home_dir path to home directory
-#' @param data data in wide format as: a data frame, list of imputed
-#'   data frames, or mids object
+#' @param data data in wide format as: a data frame, list of imputed data
+#'   frames, or mids object
 #' @param weights list of IPTW weights output from createWeights()
 #' @param exposure name of exposure variable
 #' @param exposure_time_pts list of integers at which weights will be
@@ -32,7 +32,8 @@
 #' @param epochs (optional) data frame of exposure epoch labels and values
 #' @param verbose (optional) TRUE or FALSE indicator for user output (default is
 #'   TRUE)
-#' @param save.out (optional) TRUE or FALSE indicator to save output and intermediary output locally (default is TRUE)
+#' @param save.out (optional) TRUE or FALSE indicator to save output and
+#'   intermediary output locally (default is TRUE)
 #' @return list of svyglm model output
 #' @export
 #' @examples
@@ -111,7 +112,9 @@
 #'               covariates = "C",
 #'               save.out = FALSE)
 
-fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outcome, tv_confounders, model, family = gaussian, link = "identity", int_order = NA, covariates = NULL, epochs = NULL, verbose = TRUE, save.out = TRUE) {
+fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outcome, tv_confounders, model,
+                     family = gaussian, link = "identity", int_order = NA, covariates = NULL, epochs = NULL,
+                     verbose = TRUE, save.out = TRUE) {
 
   if (save.out) {
     if (missing(home_dir)) {
@@ -333,8 +336,11 @@ fitModel <- function(home_dir, data, weights, exposure, exposure_time_pts, outco
     }
 
     if (save.out){
+      require(officer) #is there another way to do this? required for writing to word
+      require(flextable) # " "
       suppressWarnings(jtools::export_summs(fits, to.file = "docx", statistics = c(N = "nobs", AIC = "AIC", R2 = "r.squared"),
-                                            file.name = file.path(home_dir, "models", paste0(exposure, "-", outcome, "_", model, "_table_mod_ev.docx"))))
+                                            file.name = file.path(home_dir, "models", paste0(exposure, "-", outcome, "_", model,
+                                                                                             "_table_mod_ev.docx"))))
     }
 
   }

diff --git a/R/inspectData.R b/R/inspectData.R
@@ -146,58 +146,6 @@ inspectData <- function(data, home_dir, exposure, exposure_time_pts, outcome, tv
   exposure_type <- ifelse(inherits(data[, paste0(exposure, '.', exposure_time_pts[1])],
                                    "numeric"), "continuous", "binary")
 
-  # Data type
-  cat("The following variables are designated as numeric:", "\n")
-  print(paste(colnames(data)[sapply(data, class) == "numeric"], sep = ",", collapse = ", "))
-
-  cat("The following variables are designated as factors:", "\n")
-  print(paste(colnames(data)[sapply(data, class) == "factor"], sep = ",", collapse = ", "))
-
-  oth <- data.frame(variable = names(sapply(data, class)) [!sapply(data, class) %in% c("numeric", "factor")],
-                    type = sapply(data, class) [!sapply(data, class) %in% c("numeric", "factor")])
-  cat(knitr::kable(oth, caption = "Other variable types",
-                   format = 'pipe'), sep = "\n")
-
-
-  # Exposure summary
-  exposure_summary <- data %>%
-    dplyr:: select(colnames(data)[grepl(exposure, colnames(data))])
-  exposure_summary <- psych::describe(exposure_summary, fast = TRUE)
-
-
-  if (save.out){
-    knitr::kable(exposure_summary, caption = paste0("Summary of ", exposure, " Exposure Information"), format = 'html') %>%
-      kableExtra::kable_styling() %>%
-      kableExtra::save_kable(file = file.path(home_dir, paste0("/", exposure, "_exposure_info.html")))
-    if(verbose){
-      cat(knitr::kable(exposure_summary, caption = paste0("Summary of ", exposure, " Exposure Information"), format = 'pipe'), sep = "\n")
-      cat(paste0(exposure, " exposure descriptive statistics have now been saved in the home directory"), "\n")
-      cat("\n")
-    }
-  }
-
-
-  # Outcome summary
-  outcome_summary <- data %>%
-    dplyr:: select(contains(sapply(strsplit(outcome, "\\."), "[", 1)))
-  outcome_summary <- psych::describe(outcome_summary, fast = TRUE)
-
-  if(save.out){
-    knitr::kable(outcome_summary, caption = paste0("Summary of Outcome ",
-                                                   sapply(strsplit(outcome, "\\."), "[", 1), " Information"), format = 'html') %>%
-      kableExtra::kable_styling() %>%
-      kableExtra::save_kable(file = file.path(home_dir, paste0("/", sapply(strsplit(outcome, "\\."), "[", 1), "_outcome_info.html")))
-
-    if (verbose){
-      cat(knitr::kable(outcome_summary, caption = paste0("Summary of Outcome ",
-                                                         sapply(strsplit(outcome, "\\."), "[", 1), " Information"),
-                       format = 'pipe'), sep = "\n")
-
-      cat(paste0(sapply(strsplit(outcome, "\\."), "[", 1), " outcome descriptive statistics have now been saved in the home directory"), "\n")
-    }
-  }
-
-
 
   # Confounder summary
   potential_covariates <- colnames(data)[!(colnames(data) %in% c(ID))]
@@ -238,7 +186,7 @@ inspectData <- function(data, home_dir, exposure, exposure_time_pts, outcome, tv
 
   for (l in seq_len(nrow(test))) {
     z = c(sapply(strsplit(all_potential_covariates[grepl(paste0(".", rownames(test)[l]),
-                                                        all_potential_covariates)], "\\."), "[", 1), time_invar_covars)
+                                                         all_potential_covariates)], "\\."), "[", 1), time_invar_covars)
     z = z[!duplicated(z)]
     test[l, z ] <- 1
   }
@@ -255,16 +203,37 @@ inspectData <- function(data, home_dir, exposure, exposure_time_pts, outcome, tv
 
     if(verbose){
       print(glue::glue("See the home directory for a table and matrix displaying all covariates confounders considered at each exposure time point for {exposure} and {outcome}."), "\n")
-      cat("\n")
 
       #-2 to exclude ID and WAVE
-      print(glue::glue("USER ALERT: Below are the {as.character(length(all_potential_covariates) - 2)} variables spanning {unique_vars - 2} unique domains that will be treated as confounding variables for the relation between {exposure} and {outcome}."), "\n",
-              "Please inspect this list carefully. It should include all time-varying covariates, time invariant covariates, as well as lagged levels of exposure and outcome variables if they were collected at time points earlier than the outcome time point.", "\n")
-      cat("\n")
+      print(glue::glue("USER ALERT: Below are the {as.character(length(all_potential_covariates) - 2)} variables spanning {unique_vars - 2} unique domains that will be treated as confounding variables for the relation between {exposure} and {outcome}."),
+            "Please inspect this list carefully. It should include all time-varying covariates, time invariant covariates, as well as lagged levels of exposure and outcome variables if they were collected at time points earlier than the outcome time point.", "\n")
       print(all_potential_covariates[!(all_potential_covariates %in% c(ID))])
     }
   }
 
+
+  # Data type
+  cat("\n")
+  cat("The following variables are designated as numeric:", "\n")
+  print(paste(colnames(data)[sapply(data, class) == "numeric"], sep = ",", collapse = ", "))
+  cat("\n")
+
+  cat("The following variables are designated as factors:", "\n")
+  print(paste(colnames(data)[sapply(data, class) == "factor"], sep = ",", collapse = ", "))
+  cat("\n")
+
+  oth <- data.frame(variable = names(sapply(data, class)) [!sapply(data, class) %in% c("numeric", "factor")],
+                    type = sapply(data, class) [!sapply(data, class) %in% c("numeric", "factor")])
+  if(nrow(oth) > 0 ){
+    cat(knitr::kable(oth, caption = "Other variable types",
+                     format = 'pipe'), sep = "\n")
+    cat("\n")
+  }
+
+  if(sum(sapply(data, is.character)) > 0){
+    warning(paste0(paste(names(data)[sapply(data, is.character)], sep = ", ", collapse = ", "),
+                   " are of class character.", " The package cannot accept character variables."), call. = FALSE)
+  }
   #covariate correlations
   covariates_to_include <- all_potential_covariates
 
@@ -298,6 +267,26 @@ inspectData <- function(data, home_dir, exposure, exposure_time_pts, outcome, tv
   }
 
 
+  # Exposure summary
+  exposure_summary <- data %>%
+    dplyr:: select(colnames(data)[grepl(exposure, colnames(data))])
+  exposure_summary <- psych::describe(exposure_summary, fast = TRUE)
+
+
+  if (save.out){
+    knitr::kable(exposure_summary, caption = paste0("Summary of ", exposure, " Exposure Information"),
+                 format = 'html') %>%
+      kableExtra::kable_styling() %>%
+      kableExtra::save_kable(file = file.path(home_dir, paste0("/", exposure, "_exposure_info.html")))
+    if(verbose){
+      cat(knitr::kable(exposure_summary, caption = paste0("Summary of ", exposure, " Exposure Information"),
+                       format = 'pipe'), sep = "\n")
+      cat(paste0(exposure, " exposure descriptive statistics have now been saved in the home directory"), "\n")
+      cat("\n")
+    }
+  }
+
+
   # Exposure history summary
   if( is.null(epochs)){ #making epochs time pts if not specified by user
     epochs <- data.frame(epochs = as.character(time_pts),
@@ -315,4 +304,26 @@ inspectData <- function(data, home_dir, exposure, exposure_time_pts, outcome, tv
 
   eval_hist(data = data2, exposure, tv_confounders, epochs,
             exposure_time_pts, hi_lo_cut, ref = reference, comps = comparison, verbose)
+
+
+
+  # Outcome summary
+  outcome_summary <- data[, grepl(sapply(strsplit(outcome, "\\."),
+                                         "[", 1), colnames(data))]
+  outcome_summary <- psych::describe(outcome_summary, fast = TRUE)
+
+  if(save.out){
+    knitr::kable(outcome_summary, caption = paste0("Summary of Outcome ",
+                                                   sapply(strsplit(outcome, "\\."), "[", 1), " Information"), format = 'html') %>%
+      kableExtra::kable_styling() %>%
+      kableExtra::save_kable(file = file.path(home_dir, paste0("/", sapply(strsplit(outcome, "\\."), "[", 1), "_outcome_info.html")))
+
+    if (verbose){
+      cat(knitr::kable(outcome_summary, caption = paste0("Summary of Outcome ",
+                                                         sapply(strsplit(outcome, "\\."), "[", 1), " Information"),
+                       format = 'pipe'), sep = "\n")
+
+      cat(paste0(sapply(strsplit(outcome, "\\."), "[", 1), " outcome descriptive statistics have now been saved in the home directory"), "\n")
+    }
+  }
 }