imp mod

istallworthy · Sep 15, 2023 · b172e6c · b172e6c
1 parent 20d82b6
commit b172e6c
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 23 deletions.
diff --git a/R/imputeData.R b/R/imputeData.R
@@ -24,11 +24,12 @@
 #' @param outcome name of outcome variable with ".timepoint" suffix
 #' @param tv_confounders list of time-varying confounders with ".timepoint" suffix
 #' @param ti_confounders list of time invariant confounders
+#' @param para_proc (optional) TRUE/FALSE whether to do parallel processing using multiple cores to speed up process (default = TRUE)
 #' @param read_in_from_file (optional) "yes" or "no" indicatorto read in weights that have been previously run and saved locally (default is "no")
 #' @return mice object of m imputed datasets
 
 
-imputeData <- function(data, m = 5, method = "rf", home_dir, exposure, outcome, tv_confounders, ti_confounders, read_imps_from_file = "no") {
+imputeData <- function(data, m = 5, method = "rf", home_dir, exposure, outcome, tv_confounders, ti_confounders, para_proc = TRUE, read_imps_from_file = "no") {
 
   if (missing(home_dir)){
     stop("Please supply a home directory.", call. = FALSE)
@@ -96,28 +97,34 @@ imputeData <- function(data, m = 5, method = "rf", home_dir, exposure, outcome,
     imp_method <- method
     time_varying_covariates <- tv_confounders
     time_invar <- ti_confounders
+    data_to_impute <- data
+    data_to_impute <- tibble(data_to_impute)
 
     cat(glue::glue("Creating {m} imputed datasets using the {imp_method} imputation method in mice. This may take some time to run."))
     cat("\n")
 
-    # Configure parallelization
-    nCores <- min(parallel::detectCores(), 8)
-    options(mc.cores = nCores)
-    options(cores = nCores)
-    doParallel::registerDoParallel(cores = nCores)
-    cat("### Using", foreach::getDoParWorkers(), "cores\n")
-    cat("### Using", foreach::getDoParName(), "as the backend\n")
-
-    data_to_impute <- data
-    data_to_impute <- tibble(data_to_impute)
-
-    # Conducts imputations using parallelized execution cycling through m
-    imputed_datasets <- foreach::foreach(i = seq_len(m), .combine = mice::ibind) %dorng% {
-      cat("### Started iteration", i, "\n")
-      miceout <- mice::mice(data_to_impute, m = 1, method = imp_method, maxit = 1,
+    if (para_proc){
+      # Configure parallelization
+      nCores <- min(parallel::detectCores(), 8)
+      options(mc.cores = nCores)
+      options(cores = nCores)
+      doParallel::registerDoParallel(cores = nCores)
+      cat("### Using", foreach::getDoParWorkers(), "cores\n")
+      cat("### Using", foreach::getDoParName(), "as the backend\n")
+
+      # Conducts imputations using parallelized execution cycling through m
+      imputed_datasets <- foreach::foreach(i = seq_len(m), .combine = mice::ibind) %dorng% {
+        cat("### Started iteration", i, "\n")
+        miceout <- mice::mice(data_to_impute, m = 1, method = imp_method, maxit = 0,
+                              print = F)
+        cat("### Completed iteration", i, "\n")
+        miceout
+      }
+    }
+    else{
+      imputed_datasets <- mice::mice(data_to_impute, m = m, method = imp_method, maxit = 0,
                             print = F)
-      cat("### Completed iteration", i, "\n")
-      miceout
+
     }
 
     saveRDS(imputed_datasets, glue::glue("{home_dir}/imputations/{exposure}-{outcome}_all_imp.rds"))

diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd
@@ -116,18 +116,19 @@ data_wide <- stats::reshape(data = data_long,
                             direction = "wide")
 data_wide <- data_wide[,colSums(is.na(data_wide)) < nrow(data_wide)]
 
-data_wide <- imputed_data[[1]]
+
 
 
 #### P5 Optional Imputation
 
 #impute wide data to account for missingness
+data_wide = data_wide[, !colnames(data_wide) %in% c("RHasSO.6","RHasSO.15", "RHasSO.24")]
 m <- 1
 method <- "pmm"; #pmm, midastouch, sample, cart , rf (default)
 imputed_data <- imputeData(data_wide, m, method, home_dir, exposure, outcome, tv_confounders, ti_confounders, 
-                           read_imps_from_file = "no")
-
+                           para_proc = FALSE, read_imps_from_file = "no")
 
+data = complete(imputed_data, 1)
 
 
 #read in imputed csv files to list
@@ -262,7 +263,7 @@ short_formulas <- createFormulas(home_dir, exposure, exposure_time_pts, outcome,
 
 ```
 
-#### 2b/2d. Create weights with all weighting methods
+#### 2b. Create weights with all weighting methods
 ```{r}
 
 ## Create balancing weights with simplified formulas
@@ -300,7 +301,7 @@ weights.super <- createWeights(home_dir, data, exposure, outcome, tv_confounders
                                method, read_in_from_file = "no", verbose = TRUE, save.out = TRUE)
 ```
 
-#### 2c/2d. Assess all weighting methods
+#### 2c. Assess all weighting methods
 ```{r}
 
 #optional balance threshold specification

diff --git a/man/imputeData.Rd b/man/imputeData.Rd