updates to documentation; add NEWS.md

HRDAG · Feb 2, 2024 · 11dd81a · 11dd81a
1 parent 94b22cd
commit 11dd81a
Show file tree

Hide file tree

Showing 8 changed files with 141 additions and 114 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,3 @@
+# verdata 0.9.1
+
+* Submitted to CRAN
diff --git a/R/combine_replicates.R b/R/combine_replicates.R
@@ -31,25 +31,28 @@
 proportions_imputed <- function(complete_data,
                                 strata_vars,
                                 digits = 2) {
-    
+
     if (!is.data.frame(complete_data)) {
         stop("This argument must be a data.frame")
     }
-    
+
     if (digits < 0) {stop("Cannot round to negative decimal places")}
-  
+
     proportions_data <- complete_data %>%
-        dplyr::mutate(imp_lo_p = round(imp_lo / sum(imp_mean, na.rm = TRUE), digits = digits),
-                      imp_mean_p = round(imp_mean / sum(imp_mean, na.rm = TRUE), digits = digits),
-                      imp_hi_p = round(imp_hi / sum(imp_mean, na.rm = TRUE), digits = digits))
-
+        dplyr::mutate(imp_lo_p = round(imp_lo / sum(imp_mean, na.rm = TRUE),
+                                       digits = digits),
+                      imp_mean_p = round(imp_mean / sum(imp_mean, na.rm = TRUE),
+                                         digits = digits),
+                      imp_hi_p = round(imp_hi / sum(imp_mean, na.rm = TRUE),
+                                       digits = digits))
+
     proportions_data <- proportions_data %>%
         dplyr::select(all_of({{strata_vars}}),
                       imp_lo, imp_mean, imp_hi,
                       imp_lo_p, imp_mean_p, imp_hi_p)
-    
+
     return(proportions_data)
-    
+
 }
 
 #' Combine replicates according to the Normal approximation using the laws of total expectation and variance.
@@ -77,6 +80,7 @@ proportions_imputed <- function(complete_data,
 #' @importFrom dplyr %>%
 #'
 #' @examples
+#' \dontrun{
 #' local_dir <- system.file("extdata", "right", package = "verdata")
 #' replicates_data <- read_replicates(local_dir, "reclutamiento", c(1, 2))
 #' replicates_obs_data <- summary_observed("reclutamiento", replicates_data,
@@ -86,6 +90,7 @@ proportions_imputed <- function(complete_data,
 #' replicates_data, strata_vars = 'sexo', conflict_filter = TRUE,
 #' forced_dis_filter = FALSE, edad_minors_filter = FALSE, include_props = FALSE,
 #' digits = 2)
+#' }
 combine_replicates <- function(violation,
                                replicates_obs_data,
                                replicates_data,
@@ -95,111 +100,111 @@ combine_replicates <- function(violation,
                                edad_minors_filter = FALSE,
                                include_props = FALSE,
                                digits = 2) {
-    
+
     if (!(violation %in% c("homicidio", "secuestro", "reclutamiento", "desaparicion"))) {
-        
+
         stop("Violation argument incorrectly specified. Please put any of the following
          violations (in quotes and in lower case): homicidio, secuestro,
          reclutamiento or desaparicion")
     }
-    
+
     if (!is.data.frame(replicates_obs_data)) {
         stop("The argument 'replicates_obs_data' must be a data frame")
     }
-    
+
     if (!is.data.frame(replicates_data)) {
         stop("The argument 'replicates_data' must be a data frame")
     }
-    
+
     if (!is.null(strata_vars)) {
-        
+
         strata_vars_missing <- setdiff(strata_vars, names(replicates_data))
-        
+
         if (length(strata_vars_missing) > 0) {
             stop("This variable is not found in the replicates. Please check if
            it exists or if it has another name.")
         }
     }
-    
+
     if (forced_dis_filter == TRUE & violation != "desaparicion") {
         stop("This argument only applies to 'desaparicion'. Please change the
          TRUE option to FALSE")
     }
-    
+
     num_replicates <- dplyr::n_distinct(replicates_data$replica)
-    
+
     if (num_replicates == 1) {
-        
+
         stop("Results cannot be calculated using only 1 replicate. For more
            consistent results please work with more replicates.")
-        
+
     }
-    
+
     else {
-        
+
         logger::log_info("You are working with {num_replicates} replicates according to filter")
-        
+
     }
-    
+
     if (digits < 0) {stop("Cannot round to negative decimal places")}
-    
+
     if (conflict_filter == TRUE) {
-        
+
         logger::log_info("Analyzing victims related to armed conflict")
-        
+
         prep_data <- replicates_data %>%
             dplyr::mutate(is_conflict = as.integer(is_conflict)) %>%
             dplyr::filter(is_conflict == 1)
-        
+
     } else {
-        
+
         logger::log_info("You are working with all victims (related and not related to is_conflict)")
-        
+
         prep_data <- replicates_data %>%
             dplyr::mutate(is_conflict = as.integer(is_conflict))
     }
-    
-    
+
+
     if (edad_minors_filter == TRUE) {
-        
+
         logger::log_info("Analyzing victims under 18 years of age")
-        
+
         prep_data <- prep_data %>%
             dplyr::filter(edad_jep == "INFANCIA" |
                               edad_jep == "ADOLESCENCIA")
-        
+
     } else {
-        
+
         logger::log_info("Analyzing victims of all ages")
         prep_data <- prep_data
-        
+
     }
-    
+
     if (violation == "desaparicion" & forced_dis_filter == TRUE) {
-        
+
         logger::log_info("Analyzing the documented victims who were victims of forced disappearance")
-        
+
         prep_data <- prep_data %>%
             dplyr::mutate(is_forced_dis = as.integer(is_forced_dis)) %>%
             dplyr::filter(is_forced_dis == 1)
-        
+
     } else {
-        
+
         logger::log_info("Not filtering in is_forced_dis")
-        
+
     }
-    
+
     prep_data <- prep_data  %>%
         dplyr::mutate(dplyr::across(all_of({{strata_vars}}), as.character)) %>%
         dplyr::group_by(replica, dplyr::across(all_of({{strata_vars}}))) %>%
         dplyr::summarise(Freq = dplyr::n()) %>%
         dplyr::ungroup()
-    
+
     theta <- prep_data %>%
         dplyr::group_by(dplyr::across(all_of({{strata_vars}}))) %>%
         dplyr::summarize(theta = round(mean(Freq), 0)) %>%
         dplyr::ungroup()
-    
+
     rep_data <- prep_data %>%
         dplyr::left_join(theta) %>%
         dplyr::mutate(vb1 = (Freq - theta)^2) %>%
@@ -214,39 +219,41 @@ combine_replicates <- function(violation,
         dplyr::mutate(upper_ci = round(theta + (1.96 * se_b), 0)) %>%
         dplyr::select(all_of({{strata_vars}}), lower_ci, theta, upper_ci) %>%
         dplyr::rename(imp_mean = theta, imp_lo = lower_ci, imp_hi = upper_ci)
-    
+
     if (include_props == TRUE) {
-        
+
         logger::log_info("Including the proportions")
-        
+
         rep_data <- proportions_imputed(rep_data, strata_vars, digits = digits)
-        
+
         rep_data <- rep_data %>%
             dplyr::mutate(imp_lo_p = dplyr::if_else(imp_lo_p < 0, 0, imp_lo_p))
-        
+
     } else {
-        
+
         logger::log_info("Don't include the proportions")
-        
+
     }
-    
+
     final_data <- rep_data %>%
         dplyr::mutate(dplyr::across(all_of(strata_vars), as.character))
-    
+
     replicates_obs_data <- replicates_obs_data %>%
         dplyr::mutate(dplyr::across(all_of(strata_vars), as.character))
-
-    final_data <- dplyr::full_join(rep_data, replicates_obs_data, by = {{strata_vars}}) %>%
+
+    final_data <- dplyr::full_join(rep_data,
+                                   replicates_obs_data,
+                                   by = {{strata_vars}}) %>%
         dplyr::mutate(imp_lo = dplyr::if_else(imp_lo < observed,
                                               observed, imp_lo))
-    
+
     final_data <- final_data %>%
         dplyr::select(all_of({{strata_vars}}), observed,
                       dplyr::everything()) %>%
         dplyr::arrange(dplyr::desc(imp_mean))
-    
+
     return(final_data)
-    
+
 }
 
 

diff --git a/R/estimate_mse.R b/R/estimate_mse.R
@@ -356,7 +356,8 @@ mse <- function(stratum_data, stratum_name,
 
         return(tibble::tibble_row(validated = FALSE,
                                   N = NA_real_,
-                                  valid_sources = paste(valid_sources, collapse = ","),
+                                  valid_sources = paste(valid_sources,
+                                                        collapse = ","),
                                   n_obs = NA_real_,
                                   stratum_name = stratum_name))
 
@@ -400,7 +401,8 @@ mse <- function(stratum_data, stratum_name,
 
             estimates <- lookup_results %>%
                 dplyr::mutate(validated = TRUE,
-                              valid_sources = paste(names(stratum_data_prepped), collapse = ","),
+                              valid_sources = paste(names(stratum_data_prepped),
+                                                    collapse = ","),
                               n_obs = n_obs,
                               stratum_name = stratum_name) %>%
                 dplyr::select(validated, N, valid_sources, n_obs, stratum_name)