Skip to content

Commit

Permalink
updates to documentation; add NEWS.md
Browse files Browse the repository at this point in the history
  • Loading branch information
thegargiulian committed Feb 2, 2024
1 parent 94b22cd commit 11dd81a
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 114 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# verdata 0.9.1

* Submitted to CRAN
129 changes: 68 additions & 61 deletions R/combine_replicates.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,28 @@
proportions_imputed <- function(complete_data,
strata_vars,
digits = 2) {

if (!is.data.frame(complete_data)) {
stop("This argument must be a data.frame")
}

if (digits < 0) {stop("Cannot round to negative decimal places")}

proportions_data <- complete_data %>%
dplyr::mutate(imp_lo_p = round(imp_lo / sum(imp_mean, na.rm = TRUE), digits = digits),
imp_mean_p = round(imp_mean / sum(imp_mean, na.rm = TRUE), digits = digits),
imp_hi_p = round(imp_hi / sum(imp_mean, na.rm = TRUE), digits = digits))

dplyr::mutate(imp_lo_p = round(imp_lo / sum(imp_mean, na.rm = TRUE),
digits = digits),
imp_mean_p = round(imp_mean / sum(imp_mean, na.rm = TRUE),
digits = digits),
imp_hi_p = round(imp_hi / sum(imp_mean, na.rm = TRUE),
digits = digits))

proportions_data <- proportions_data %>%
dplyr::select(all_of({{strata_vars}}),
imp_lo, imp_mean, imp_hi,
imp_lo_p, imp_mean_p, imp_hi_p)

return(proportions_data)

}

#' Combine replicates according to the Normal approximation using the laws of total expectation and variance.
Expand Down Expand Up @@ -77,6 +80,7 @@ proportions_imputed <- function(complete_data,
#' @importFrom dplyr %>%
#'
#' @examples
#' \dontrun{
#' local_dir <- system.file("extdata", "right", package = "verdata")
#' replicates_data <- read_replicates(local_dir, "reclutamiento", c(1, 2))
#' replicates_obs_data <- summary_observed("reclutamiento", replicates_data,
Expand All @@ -86,6 +90,7 @@ proportions_imputed <- function(complete_data,
#' replicates_data, strata_vars = 'sexo', conflict_filter = TRUE,
#' forced_dis_filter = FALSE, edad_minors_filter = FALSE, include_props = FALSE,
#' digits = 2)
#' }
combine_replicates <- function(violation,
replicates_obs_data,
replicates_data,
Expand All @@ -95,111 +100,111 @@ combine_replicates <- function(violation,
edad_minors_filter = FALSE,
include_props = FALSE,
digits = 2) {

if (!(violation %in% c("homicidio", "secuestro", "reclutamiento", "desaparicion"))) {

stop("Violation argument incorrectly specified. Please put any of the following
violations (in quotes and in lower case): homicidio, secuestro,
reclutamiento or desaparicion")
}

if (!is.data.frame(replicates_obs_data)) {
stop("The argument 'replicates_obs_data' must be a data frame")
}

if (!is.data.frame(replicates_data)) {
stop("The argument 'replicates_data' must be a data frame")
}

if (!is.null(strata_vars)) {

strata_vars_missing <- setdiff(strata_vars, names(replicates_data))

if (length(strata_vars_missing) > 0) {
stop("This variable is not found in the replicates. Please check if
it exists or if it has another name.")
}
}

if (forced_dis_filter == TRUE & violation != "desaparicion") {
stop("This argument only applies to 'desaparicion'. Please change the
TRUE option to FALSE")
}

num_replicates <- dplyr::n_distinct(replicates_data$replica)

if (num_replicates == 1) {

stop("Results cannot be calculated using only 1 replicate. For more
consistent results please work with more replicates.")

}

else {

logger::log_info("You are working with {num_replicates} replicates according to filter")

}

if (digits < 0) {stop("Cannot round to negative decimal places")}

if (conflict_filter == TRUE) {

logger::log_info("Analyzing victims related to armed conflict")

prep_data <- replicates_data %>%
dplyr::mutate(is_conflict = as.integer(is_conflict)) %>%
dplyr::filter(is_conflict == 1)

} else {

logger::log_info("You are working with all victims (related and not related to is_conflict)")

prep_data <- replicates_data %>%
dplyr::mutate(is_conflict = as.integer(is_conflict))
}


if (edad_minors_filter == TRUE) {

logger::log_info("Analyzing victims under 18 years of age")

prep_data <- prep_data %>%
dplyr::filter(edad_jep == "INFANCIA" |
edad_jep == "ADOLESCENCIA")

} else {

logger::log_info("Analyzing victims of all ages")
prep_data <- prep_data

}

if (violation == "desaparicion" & forced_dis_filter == TRUE) {

logger::log_info("Analyzing the documented victims who were victims of forced disappearance")

prep_data <- prep_data %>%
dplyr::mutate(is_forced_dis = as.integer(is_forced_dis)) %>%
dplyr::filter(is_forced_dis == 1)

} else {

logger::log_info("Not filtering in is_forced_dis")

}

prep_data <- prep_data %>%
dplyr::mutate(dplyr::across(all_of({{strata_vars}}), as.character)) %>%
dplyr::group_by(replica, dplyr::across(all_of({{strata_vars}}))) %>%
dplyr::summarise(Freq = dplyr::n()) %>%
dplyr::ungroup()

theta <- prep_data %>%
dplyr::group_by(dplyr::across(all_of({{strata_vars}}))) %>%
dplyr::summarize(theta = round(mean(Freq), 0)) %>%
dplyr::ungroup()

rep_data <- prep_data %>%
dplyr::left_join(theta) %>%
dplyr::mutate(vb1 = (Freq - theta)^2) %>%
Expand All @@ -214,39 +219,41 @@ combine_replicates <- function(violation,
dplyr::mutate(upper_ci = round(theta + (1.96 * se_b), 0)) %>%
dplyr::select(all_of({{strata_vars}}), lower_ci, theta, upper_ci) %>%
dplyr::rename(imp_mean = theta, imp_lo = lower_ci, imp_hi = upper_ci)

if (include_props == TRUE) {

logger::log_info("Including the proportions")

rep_data <- proportions_imputed(rep_data, strata_vars, digits = digits)

rep_data <- rep_data %>%
dplyr::mutate(imp_lo_p = dplyr::if_else(imp_lo_p < 0, 0, imp_lo_p))

} else {

logger::log_info("Don't include the proportions")

}

final_data <- rep_data %>%
dplyr::mutate(dplyr::across(all_of(strata_vars), as.character))

replicates_obs_data <- replicates_obs_data %>%
dplyr::mutate(dplyr::across(all_of(strata_vars), as.character))

final_data <- dplyr::full_join(rep_data, replicates_obs_data, by = {{strata_vars}}) %>%

final_data <- dplyr::full_join(rep_data,
replicates_obs_data,
by = {{strata_vars}}) %>%
dplyr::mutate(imp_lo = dplyr::if_else(imp_lo < observed,
observed, imp_lo))

final_data <- final_data %>%
dplyr::select(all_of({{strata_vars}}), observed,
dplyr::everything()) %>%
dplyr::arrange(dplyr::desc(imp_mean))

return(final_data)

}


Expand Down
6 changes: 4 additions & 2 deletions R/estimate_mse.R
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,8 @@ mse <- function(stratum_data, stratum_name,

return(tibble::tibble_row(validated = FALSE,
N = NA_real_,
valid_sources = paste(valid_sources, collapse = ","),
valid_sources = paste(valid_sources,
collapse = ","),
n_obs = NA_real_,
stratum_name = stratum_name))

Expand Down Expand Up @@ -400,7 +401,8 @@ mse <- function(stratum_data, stratum_name,

estimates <- lookup_results %>%
dplyr::mutate(validated = TRUE,
valid_sources = paste(names(stratum_data_prepped), collapse = ","),
valid_sources = paste(names(stratum_data_prepped),
collapse = ","),
n_obs = n_obs,
stratum_name = stratum_name) %>%
dplyr::select(validated, N, valid_sources, n_obs, stratum_name)
Expand Down
Loading

0 comments on commit 11dd81a

Please sign in to comment.