From af794a5805aee4a6403a04b6bd7fe1001b9bd511 Mon Sep 17 00:00:00 2001 From: Isa Stallworthy <31548151+istallworthy@users.noreply.github.com> Date: Fri, 15 Sep 2023 16:57:56 -0400 Subject: [PATCH] data type checking --- R/inspectData.R | 14 +++++++++++--- examplePipelineRevised.Rmd | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/R/inspectData.R b/R/inspectData.R index 64e2f74d..3f5055c7 100644 --- a/R/inspectData.R +++ b/R/inspectData.R @@ -137,7 +137,7 @@ inspectData <- function(data, home_dir, exposure, exposure_time_pts, outcome, tv } - if(!inherits(data, data.frame)){ + if(!inherits(data, "data.frame")){ warning(paste0("Your data is a ", class(data), ". Convert to data frame before running devMSMs."), call. = FALSE) } @@ -147,8 +147,16 @@ inspectData <- function(data, home_dir, exposure, exposure_time_pts, outcome, tv "numeric"), "continuous", "binary") # Data type - cat("Please inspect the following table of data types to ensure they are correct for each variable:") - print(str(data)) + cat("The following variables are designated as numeric:", "\n") + print(paste(colnames(data)[sapply(data, class) == "numeric"], sep = ",", collapse = ", ")) + + cat("The following variables are designated as factors:", "\n") + print(paste(colnames(data)[sapply(data, class) == "factor"], sep = ",", collapse = ", ")) + + oth <- data.frame(variable = names(sapply(data, class)) [!sapply(data, class) %in% c("numeric", "factor")], + type = sapply(data, class) [!sapply(data, class) %in% c("numeric", "factor")]) + cat(knitr::kable(oth, caption = "Other variable types", + format = 'pipe'), sep = "\n") # Exposure summary diff --git a/examplePipelineRevised.Rmd b/examplePipelineRevised.Rmd index ce40978b..a441f01d 100644 --- a/examplePipelineRevised.Rmd +++ b/examplePipelineRevised.Rmd @@ -88,7 +88,7 @@ data <- read.csv("/Users/isabella/Library/CloudStorage/Box-Box/BSL General/MSMs/ data <- as.data.frame(data) library(dplyr) -data_df <- data_mids[[1]] +data_df <- complete(data_mids, 1) data_df <- data_df %>% dplyr::select(-c(contains(c(":", "Childhood", "Infancy", "Toddlerhood", "pcx")))) data <- data_df #single df