diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index 624064c..d36b593 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -10,6 +10,7 @@ /Users/med-tv_/Documents/GitHub/missCompare/R/MCAR.R="6913D720" /Users/med-tv_/Documents/GitHub/missCompare/R/MNAR.R="9877AF3D" /Users/med-tv_/Documents/GitHub/missCompare/R/all_patterns.R="F8724802" +/Users/med-tv_/Documents/GitHub/missCompare/R/check.Renviron="2DC40FEC" /Users/med-tv_/Documents/GitHub/missCompare/R/clean.R="F8B1BB77" /Users/med-tv_/Documents/GitHub/missCompare/R/clindata_miss.R="E1AC835C" /Users/med-tv_/Documents/GitHub/missCompare/R/get_data.R="4BA118BE" diff --git a/R/check.Renviron b/R/check.Renviron deleted file mode 100644 index c67d565..0000000 --- a/R/check.Renviron +++ /dev/null @@ -1 +0,0 @@ -R_CHECK_DONTTEST_EXAMPLES=false diff --git a/R/impute_data.R b/R/impute_data.R index 55216d2..a7d2d70 100644 --- a/R/impute_data.R +++ b/R/impute_data.R @@ -57,19 +57,17 @@ #' @name impute_data #' #' @examples -#' \donttest{ -#' #running 10 iterations of all algorithms (that allow for multiple imputation) and -#' #one copy of those that do not allow for multiple imputations -#' impute_data(df, scale = TRUE, n.iter = 10, -#' sel_method = c(1:16)) -#' #running 20 iterations of missForest (e.g. this was the best performing algorithm -#' #in simulations) on a non-scaled dataframe -#' impute_data(df, scale = FALSE, n.iter = 20, -#' sel_method = c(14)) -#' #running 1 iterations of four selected non-probabilistic algorithms on a scaled dataframe -#' impute_data(df, scale = TRUE, n.iter = 1, -#' sel_method = c(2:3, 5, 7)) -#' } +#' ## running 10 iterations of all algorithms (that allow for multiple imputation) and +#' ## one copy of those that do not allow for multiple imputations +#' # impute_data(df, scale = TRUE, n.iter = 10, +#' # sel_method = c(1:16)) +#' ## running 20 iterations of missForest (e.g. this was the best performing algorithm +#' ## in simulations) on a non-scaled dataframe +#' # impute_data(df, scale = FALSE, n.iter = 20, +#' # sel_method = c(14)) +#' ## running 1 iterations of four selected non-probabilistic algorithms on a scaled dataframe +#' # impute_data(df, scale = TRUE, n.iter = 1, +#' # sel_method = c(2:3, 5, 7)) #' #' @export diff --git a/R/impute_simulated.R b/R/impute_simulated.R index 9693877..dfbe995 100644 --- a/R/impute_simulated.R +++ b/R/impute_simulated.R @@ -32,28 +32,26 @@ #' \item{Plot_KS}{Faceted boxplot of KS values per missingness pattern and missing data imputation algorithm} #' #' @examples -#' \donttest{ -#' #in case there is no assumed missingness pattern per variable -#' wrap <- impute_simulated(rownum = metadata$Rows, -#' colnum = metadata$Columns, -#' cormat = metadata$Corr_matrix, -#' MD_pattern = metadata$MD_Pattern, -#' NA_fraction = metadata$Fraction_missingness, -#' min_PDM = 10, -#' n.iter = 50) +#' ## in case there is no assumed missingness pattern per variable +#' # wrap <- impute_simulated(rownum = metadata$Rows, +#' # colnum = metadata$Columns, +#' # cormat = metadata$Corr_matrix, +#' # MD_pattern = metadata$MD_Pattern, +#' # NA_fraction = metadata$Fraction_missingness, +#' # min_PDM = 10, +#' # n.iter = 50) #' -#' #in case there is a pre-defined assumed pattern -#' wrap <- impute_simulated(rownum = metadata$Rows, -#' colnum = metadata$Columns, -#' cormat = metadata$Corr_matrix, -#' MD_pattern = metadata$MD_Pattern, -#' NA_fraction = metadata$Fraction_missingness, -#' min_PDM = 10, -#' assumed_pattern = c('MAR','MAR','MCAR','MCAR', -#' 'MNAR','MCAR','MAR','MNAR', -#' 'MCAR','MNAR','MCAR'), -#' n.iter = 50) -#' } +#' ## in case there is a pre-defined assumed pattern +#' # wrap <- impute_simulated(rownum = metadata$Rows, +#' # colnum = metadata$Columns, +#' # cormat = metadata$Corr_matrix, +#' # MD_pattern = metadata$MD_Pattern, +#' # NA_fraction = metadata$Fraction_missingness, +#' # min_PDM = 10, +#' # assumed_pattern = c('MAR','MAR','MCAR','MCAR', +#' # 'MNAR','MCAR','MAR','MNAR', +#' # 'MCAR','MNAR','MCAR'), +#' # n.iter = 50) #' #' @export diff --git a/R/post_imp_diag.R b/R/post_imp_diag.R index 4aaf75c..8e0319a 100644 --- a/R/post_imp_diag.R +++ b/R/post_imp_diag.R @@ -34,12 +34,10 @@ #' \item{Correlation_plot}{Scatter plot of mean pairwise Pearson's correlation coefficients from the original dataframe (with missingness) and the imputed dataframe. The blue line represents a line with slope 1 and intercept 0. The red line is a fitted line of the correlation coefficient pairs. The error bars around the points represent the individual 95\% confidence intervals drawn from bootstrapping the correlation coefficients} #' #' @examples -#' \donttest{ -#' diagnostics <- post_imp_diag(X_orig = df_miss, X_imp = df_imputed, scale=TRUE) -#' diagnostics$Histograms$variable_X -#' diagnostics$Boxplots$variable_Z -#' diagnostics$Statistics$variable_Y -#' } +#' # diagnostics <- post_imp_diag(X_orig = df_miss, X_imp = df_imputed, scale=TRUE) +#' # diagnostics$Histograms$variable_X +#' # diagnostics$Boxplots$variable_Z +#' # diagnostics$Statistics$variable_Y #' #' @export diff --git a/R/test_mi.R b/R/test_mi.R index a83e860..226bb1f 100644 --- a/R/test_mi.R +++ b/R/test_mi.R @@ -20,19 +20,17 @@ #' @inherit test_AmeliaII return #' #' @examples -#' \donttest{ -#' clindata_miss_mini <- clindata_miss[1:80,1:4] -#' cleaned <- clean(clindata_miss_mini, missingness_coding = -9) -#' metadata <- get_data(cleaned) -#' simulated <- simulate(rownum = metadata$Rows, colnum = metadata$Columns, -#' cormat = metadata$Corr_matrix) -#' miss_list <- all_patterns(simulated$Simulated_matrix, -#' MD_pattern = metadata$MD_Pattern, -#' NA_fraction = metadata$Fraction_missingness, -#' min_PDM = 2) +#' # clindata_miss_mini <- clindata_miss[1:80,1:4] +#' # cleaned <- clean(clindata_miss_mini, missingness_coding = -9) +#' # metadata <- get_data(cleaned) +#' # simulated <- simulate(rownum = metadata$Rows, colnum = metadata$Columns, +#' # cormat = metadata$Corr_matrix) +#' # miss_list <- all_patterns(simulated$Simulated_matrix, +#' # MD_pattern = metadata$MD_Pattern, +#' # NA_fraction = metadata$Fraction_missingness, +#' # min_PDM = 2) #' -#' test_mi(X_hat = simulated$Simulated_matrix, list = miss_list) -#' } +#' # test_mi(X_hat = simulated$Simulated_matrix, list = miss_list) #' #' @export diff --git a/R/test_pcaMethods_NLPCA.R b/R/test_pcaMethods_NLPCA.R index fadcbe1..924caa5 100644 --- a/R/test_pcaMethods_NLPCA.R +++ b/R/test_pcaMethods_NLPCA.R @@ -20,19 +20,17 @@ #' @inherit test_AmeliaII return #' #' @examples -#' \donttest{ -#' clindata_miss_mini <- clindata_miss[1:80,1:4] -#' cleaned <- clean(clindata_miss_mini, missingness_coding = -9) -#' metadata <- get_data(cleaned) -#' simulated <- simulate(rownum = metadata$Rows, colnum = metadata$Columns, -#' cormat = metadata$Corr_matrix) -#' miss_list <- all_patterns(simulated$Simulated_matrix, -#' MD_pattern = metadata$MD_Pattern, -#' NA_fraction = metadata$Fraction_missingness, -#' min_PDM = 2) +#' # clindata_miss_mini <- clindata_miss[1:80,1:4] +#' # cleaned <- clean(clindata_miss_mini, missingness_coding = -9) +#' # metadata <- get_data(cleaned) +#' # simulated <- simulate(rownum = metadata$Rows, colnum = metadata$Columns, +#' # cormat = metadata$Corr_matrix) +#' # miss_list <- all_patterns(simulated$Simulated_matrix, +#' # MD_pattern = metadata$MD_Pattern, +#' # NA_fraction = metadata$Fraction_missingness, +#' # min_PDM = 2) #' -#' test_pcaMethods_NLPCA(X_hat = simulated$Simulated_matrix, list = miss_list) -#' } +#' # test_pcaMethods_NLPCA(X_hat = simulated$Simulated_matrix, list = miss_list) #' #' @export diff --git a/cran-comments.md b/cran-comments.md index 35e1f84..4b2f102 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -20,7 +20,8 @@ This is a resubmission. In this version I have made the following changes in res * Please write TRUE and FALSE instead of T and F. (Please don't use 'T' or 'F' as vector names.): FIXED. All instances in all functions checked, Ts and Fs replaced with TRUEs and FALSEs. -* \dontrun{} should only be used if the example really cannot be executed (e.g. because of missing additional software, missing API keys, ...) by the user [...] Please unwrap the examples if they are executable in < 5 sec, or replace -\dontrun{} with \donttest{}: FIXED. Examples are now marked with donttest{} in the following functions: impute_data(), impute_simulated(), post_imp_diag(), test_mi() and test_pcaMethods_NLPCA() due to longer executable times. +* \dontrun{} should only be used if the example really cannot be executed (e.g. because of missing additional software, missing API keys, ...) by the user [...] Please unwrap the examples if they are executable in < 5 sec, or replace \dontrun{} with \donttest{}: FIXED, although with a hack. I really tried using donttest as +you suggested, but it just didn't work - read all material on this in forums and still couldn't get it to +work, so those examples that take too much time are now commented out in the following five functions: impute_data(), impute_simulated(), post_imp_diag(), test_mi() and test_pcaMethods_NLPCA(). * Have the issues why your package was archived been fixed?: FIXED. The package was archived because there were ERRORs that needed fixing related to the data.table::melt() function, that has been passed to various categories of objects, e.g. matrices and data.frames - as data.table::melt() currently only has a method for data.tables, objects have either been converted to data.tables or another solutions were found to replace existing code. diff --git a/man/impute_data.Rd b/man/impute_data.Rd index 4b7c10f..d4abc1c 100644 --- a/man/impute_data.Rd +++ b/man/impute_data.Rd @@ -62,18 +62,16 @@ way to impute datasets with a curated list of functions. Some of the functions a multiple imputed datasets. The user can decide to impute their dataframe with a selected method or with multiple methods. } \examples{ -\donttest{ -#running 10 iterations of all algorithms (that allow for multiple imputation) and -#one copy of those that do not allow for multiple imputations -impute_data(df, scale = TRUE, n.iter = 10, - sel_method = c(1:16)) -#running 20 iterations of missForest (e.g. this was the best performing algorithm -#in simulations) on a non-scaled dataframe -impute_data(df, scale = FALSE, n.iter = 20, - sel_method = c(14)) -#running 1 iterations of four selected non-probabilistic algorithms on a scaled dataframe -impute_data(df, scale = TRUE, n.iter = 1, - sel_method = c(2:3, 5, 7)) -} +## running 10 iterations of all algorithms (that allow for multiple imputation) and +## one copy of those that do not allow for multiple imputations +# impute_data(df, scale = TRUE, n.iter = 10, +# sel_method = c(1:16)) +## running 20 iterations of missForest (e.g. this was the best performing algorithm +## in simulations) on a non-scaled dataframe +# impute_data(df, scale = FALSE, n.iter = 20, +# sel_method = c(14)) +## running 1 iterations of four selected non-probabilistic algorithms on a scaled dataframe +# impute_data(df, scale = TRUE, n.iter = 1, +# sel_method = c(2:3, 5, 7)) } diff --git a/man/impute_simulated.Rd b/man/impute_simulated.Rd index d699e46..544be40 100644 --- a/man/impute_simulated.Rd +++ b/man/impute_simulated.Rd @@ -53,27 +53,25 @@ automatically detect whether there is a MAP matrix in the list and calculate met output by this function are calculated for ALL missing values across the dataset, not by variable. } \examples{ -\donttest{ -#in case there is no assumed missingness pattern per variable -wrap <- impute_simulated(rownum = metadata$Rows, - colnum = metadata$Columns, - cormat = metadata$Corr_matrix, - MD_pattern = metadata$MD_Pattern, - NA_fraction = metadata$Fraction_missingness, - min_PDM = 10, - n.iter = 50) +## in case there is no assumed missingness pattern per variable +# wrap <- impute_simulated(rownum = metadata$Rows, +# colnum = metadata$Columns, +# cormat = metadata$Corr_matrix, +# MD_pattern = metadata$MD_Pattern, +# NA_fraction = metadata$Fraction_missingness, +# min_PDM = 10, +# n.iter = 50) -#in case there is a pre-defined assumed pattern -wrap <- impute_simulated(rownum = metadata$Rows, - colnum = metadata$Columns, - cormat = metadata$Corr_matrix, - MD_pattern = metadata$MD_Pattern, - NA_fraction = metadata$Fraction_missingness, - min_PDM = 10, - assumed_pattern = c('MAR','MAR','MCAR','MCAR', - 'MNAR','MCAR','MAR','MNAR', - 'MCAR','MNAR','MCAR'), - n.iter = 50) -} +## in case there is a pre-defined assumed pattern +# wrap <- impute_simulated(rownum = metadata$Rows, +# colnum = metadata$Columns, +# cormat = metadata$Corr_matrix, +# MD_pattern = metadata$MD_Pattern, +# NA_fraction = metadata$Fraction_missingness, +# min_PDM = 10, +# assumed_pattern = c('MAR','MAR','MCAR','MCAR', +# 'MNAR','MCAR','MAR','MNAR', +# 'MCAR','MNAR','MCAR'), +# n.iter = 50) } diff --git a/man/post_imp_diag.Rd b/man/post_imp_diag.Rd index 669ea0d..62e8c0d 100644 --- a/man/post_imp_diag.Rd +++ b/man/post_imp_diag.Rd @@ -41,11 +41,9 @@ the original dataframe and the imputed one. Should the imputation algorithm perf the variable distributions and the variable clusters should be similar. } \examples{ -\donttest{ -diagnostics <- post_imp_diag(X_orig = df_miss, X_imp = df_imputed, scale=TRUE) -diagnostics$Histograms$variable_X -diagnostics$Boxplots$variable_Z -diagnostics$Statistics$variable_Y -} +# diagnostics <- post_imp_diag(X_orig = df_miss, X_imp = df_imputed, scale=TRUE) +# diagnostics$Histograms$variable_X +# diagnostics$Boxplots$variable_Z +# diagnostics$Statistics$variable_Y } diff --git a/man/test_mi.Rd b/man/test_mi.Rd index d3f2ff6..7823ca3 100644 --- a/man/test_mi.Rd +++ b/man/test_mi.Rd @@ -39,18 +39,16 @@ imputing all datasets. The function will automatically detect whether there is a RMSE for all matrices provided in the list. } \examples{ -\donttest{ -clindata_miss_mini <- clindata_miss[1:80,1:4] -cleaned <- clean(clindata_miss_mini, missingness_coding = -9) -metadata <- get_data(cleaned) -simulated <- simulate(rownum = metadata$Rows, colnum = metadata$Columns, -cormat = metadata$Corr_matrix) -miss_list <- all_patterns(simulated$Simulated_matrix, - MD_pattern = metadata$MD_Pattern, - NA_fraction = metadata$Fraction_missingness, - min_PDM = 2) +# clindata_miss_mini <- clindata_miss[1:80,1:4] +# cleaned <- clean(clindata_miss_mini, missingness_coding = -9) +# metadata <- get_data(cleaned) +# simulated <- simulate(rownum = metadata$Rows, colnum = metadata$Columns, +# cormat = metadata$Corr_matrix) +# miss_list <- all_patterns(simulated$Simulated_matrix, +# MD_pattern = metadata$MD_Pattern, +# NA_fraction = metadata$Fraction_missingness, +# min_PDM = 2) -test_mi(X_hat = simulated$Simulated_matrix, list = miss_list) -} +# test_mi(X_hat = simulated$Simulated_matrix, list = miss_list) } diff --git a/man/test_pcaMethods_NLPCA.Rd b/man/test_pcaMethods_NLPCA.Rd index 2949f4a..d68fda2 100644 --- a/man/test_pcaMethods_NLPCA.Rd +++ b/man/test_pcaMethods_NLPCA.Rd @@ -39,18 +39,16 @@ imputing all datasets. The function will automatically detect whether there is a RMSE for all matrices provided in the list. } \examples{ -\donttest{ -clindata_miss_mini <- clindata_miss[1:80,1:4] -cleaned <- clean(clindata_miss_mini, missingness_coding = -9) -metadata <- get_data(cleaned) -simulated <- simulate(rownum = metadata$Rows, colnum = metadata$Columns, -cormat = metadata$Corr_matrix) -miss_list <- all_patterns(simulated$Simulated_matrix, - MD_pattern = metadata$MD_Pattern, - NA_fraction = metadata$Fraction_missingness, - min_PDM = 2) +# clindata_miss_mini <- clindata_miss[1:80,1:4] +# cleaned <- clean(clindata_miss_mini, missingness_coding = -9) +# metadata <- get_data(cleaned) +# simulated <- simulate(rownum = metadata$Rows, colnum = metadata$Columns, +# cormat = metadata$Corr_matrix) +# miss_list <- all_patterns(simulated$Simulated_matrix, +# MD_pattern = metadata$MD_Pattern, +# NA_fraction = metadata$Fraction_missingness, +# min_PDM = 2) -test_pcaMethods_NLPCA(X_hat = simulated$Simulated_matrix, list = miss_list) -} +# test_pcaMethods_NLPCA(X_hat = simulated$Simulated_matrix, list = miss_list) }