Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEV: Add sample_design class and re-implement design_effect #42

Merged
merged 8 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
# Generated by roxygen2: do not edit by hand

S3method(as.character,pool_strat)
S3method(design_effect,fixed_design)
S3method(design_effect,variable_design)
S3method(format,pool_strat)
S3method(print,pool_strat)
S3method(print,power_size_results)
export(design_effect)
export(design_effect_random)
export(detection_errors)
export(detection_errors_cluster)
export(fi_pool)
export(fi_pool_cluster)
export(fi_pool_cluster_random)
export(fi_pool_random)
export(fixed_design)
export(nb_catch)
export(optimise_random_prevalence)
export(optimise_sN_prevalence)
Expand All @@ -24,3 +26,4 @@ export(power_pool_random)
export(power_size_results)
export(sample_size_pool)
export(sample_size_pool_random)
export(variable_design)
2 changes: 2 additions & 0 deletions R/check_input.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# NOTE: Will discontinue - use check_input2.R

check_input <- function(argument_name, input_value) {
# Wrapper function to triage arguments, so one function can be used for all
# inputs instead of remembering which one to use.
Expand Down
20 changes: 20 additions & 0 deletions R/check_input2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
check_geq2 <- function(val, min) {
name <- deparse(substitute(val)) # get name of variable
if (!is.numeric(val)) {
stop(glue::glue("{name} must be numeric, not {class(val)}."))
}
if (val < min) {
stop(glue::glue("{name} must be >= {min}."))
}
}

check_in_range2 <- function(val) {
name <- deparse(substitute(val)) # get name of variable
if(!is.numeric(val)) {
stop(glue::glue("{name} must be numeric, not {class(val)}."))
}
if (val < 0 | val > 1) {
message(glue::glue("{name} must be a numeric value between 0 and 1, inclusive."))
stop(glue::glue("{name} = {val}"))
}
}
93 changes: 35 additions & 58 deletions R/design_effect.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,11 @@
#' to be multiplied by a factor of D to achieve the same degree of precision in
#' estimating prevalence as a simple random survey with individual tests. The
#' functions support cluster and simple random sampling with perfect or
#' imperfect tests, and either fixed sample sizes (`design_effect()`) or random
#' sample sizes (`design_effect_random()`).
#' imperfect tests, and either fixed sample sizes
#' (`design_effect(fixed_design, ...)`) or variable sample sizes
#' (`design_effect(variable_design, ...)`).
#'
#' @param pool_size numeric The number of units per pool. Must be a numeric
#' value greater than or equal to 0.
#' @param pool_number numeric The number of pools per cluster. Must be a numeric
#' value greater than or equal to 0.
#' @param catch_dist An object of class `distribution` (e.g. produced by
#' `nb_catch()`) defining the distribution of the possible catch. If
#' `correlation = 0` the catch is for the whole survey. For `correlation > 0`
#' the catch is per cluster (i.e. cluster size).
#' @param pool_strat function Defines a rule for how a number of units will be
#' divided into pools. Must take a single numeric argument and return a named
#' list of pool sizes and pool numbers. `pool_max_size()` and
#' `pool_target_number` provide convenience functions for defining common
#' pooling strategies.
#' @param x a sample_design object
#' @param prevalence numeric The proportion of units that carry the marker of
#' interest (i.e. true positive). Must be be a numeric value between 0 and 1,
#' inclusive of both.
Expand All @@ -35,63 +24,51 @@
#' (there are no differences units within a single cluster). A value of 0
#' indicates that units within clusters are no more correlated than units in
#' different clusters.
#' @param sensitivity numeric The probability that the test correctly identifies
#' a true positive. Must be a numeric value between 0 and 1, inclusive of
#' both. A value of 1 indicates that the test can perfectly identify all true
#' positives.
#' @param specificity numeric The probability that the test correctly identifies
#' a true negative. Must be a numeric value between 0 and 1, inclusive of
#' both. A value of 1 indicates that the test can perfectly identify all true
#' negatives.
#' @param form string The distribution used to model the cluster-level
#' prevalence and correlation of units within cluster. Select one of "beta",
#' "logitnorm" or "cloglognorm". See details.
#' "logitnorm" or "cloglognorm".
#'
#' @return A numeric value of the design effect `D`.
#' @export
#'
#'
#' @examples
#' design_effect(
#' pool_size = 5, pool_number = 10, prevalence = 0.01,
#' correlation = 0.05, sensitivity = 0.99, specificity = 0.95
#' )
design_effect <- function(pool_size,
pool_number,
prevalence,
correlation,
sensitivity,
specificity,
form = "beta"){

check_input("pool_size", pool_size)
check_input("pool_number", pool_number)
check_input("prevalence", prevalence)
check_input("correlation", correlation)
check_input("sensitivity", sensitivity)
check_input("specificity", specificity)
check_input("form", form)
#' design_effect(fixed_design(10, 2), prevalence = 0.01, correlation = 0.05)
#'
#' vd <- variable_design(nb_catch(10, 13), pool_target_number(20))
#' design_effect(vd, prevalence = 0.01, correlation = 0.05)
design_effect <- function(x, prevalence, correlation, form) {
check_in_range2(prevalence)
check_in_range2(correlation)
# No input check for form as done in downstream functions/methods
UseMethod("design_effect")
}

pool_number * pool_size * fi_pool(pool_size = 1, prevalence, sensitivity, specificity) *
#' @rdname design_effect
#' @method design_effect fixed_design
#' @export
design_effect.fixed_design <- function(x,
prevalence,
correlation,
form = "beta") {

x$pool_number * x$pool_size * fi_pool(pool_size = 1, prevalence, x$sensitivity, x$specificity) *
solve(fi_pool_cluster(
pool_size, pool_number, prevalence,
correlation, sensitivity, specificity, form)
x$pool_size, x$pool_number, prevalence,
correlation, x$sensitivity, x$specificity, form)
)[1, 1]
}


#' @rdname design_effect
#' @method design_effect variable_design
#' @export
design_effect_random <- function(catch_dist,
pool_strat,
prevalence,
correlation,
sensitivity,
specificity,
form = "beta") {
design_effect.variable_design <- function(x,
prevalence,
correlation,
form = "beta") {

mean(catch_dist) * fi_pool(pool_size = 1, prevalence, sensitivity, specificity) *
mean(x$catch_dist) * fi_pool(pool_size = 1, prevalence, x$sensitivity, x$specificity) *
solve(fi_pool_cluster_random(
catch_dist, pool_strat, prevalence,
correlation, sensitivity, specificity, form)
x$catch_dist, x$pool_strat, prevalence,
correlation, x$sensitivity, x$specificity, form)
)[1, 1]
}
103 changes: 103 additions & 0 deletions R/sample_design.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#' S3 sample_design constructors
#'
#' Stores parameters related to the sampling design. Aims to reduce having to
#' input each param separately across functions (e.g. power/optimise). Can
#' either be of class `fixed_design` or `variable_design`.
#'
#' @param pool_size numeric/NULL The number of units per pool. Must be a numeric
#' value greater than 0. `fixed_design` only.
#' @param pool_number numeric/NULL The number of pools per cluster. Numeric
#' inputs must be an integer greater than or equal to 1. `fixed_design` only.
#' @param catch_dist An object of class `distribution` (e.g. produced by
#' `nb_catch()`) defining the distribution of the possible catch. If
#' `correlation = 0` the catch is for the whole survey. For `correlation > 0`
#' the catch is per cluster (i.e. cluster size). `variable_design` only.
#' @param pool_strat function Defines a rule for how a number of units will be
#' divided into pools. Must take a single numeric argument and return a named
#' list of pool sizes and pool numbers. `pool_max_size()` and
#' `pool_target_number` provide convenience functions for defining common
#' pooling strategies. `variable_design` only.
#' @param sensitivity numeric The probability that the test correctly identifies
#' a true positive. Must be a numeric value between 0 and 1, inclusive of
#' both. A value of 1 indicates that the test can perfectly identify all true
#' positives.
#' @param specificity numeric The probability that the test correctly identifies
#' a true negative. Must be a numeric value between 0 and 1, inclusive of
#' both. A value of 1 indicates that the test can perfectly identify all true
#' negatives.
#'
#' @return An object of class \code{sample_design}
#' @export
#'
#' @examples
#' fd_perfect <- fixed_design(pool_size = 10)
#'
#' fd_imperfect <- fixed_design(
#' pool_size = 10, pool_number = NULL, sensitivity = 0.95, specificity = 0.99
#' )
#'
#' vd_target <- variable_design(
#' catch_dist = nb_catch(10, 11),
#' pool_strat = pool_target_number(20)
#' )
#'
#' vd_max <- variable_design(
#' catch_dist = nb_catch(10, 11),
#' pool_strat = pool_max_size(20)
#' )
#'
#' vd_max_imperfect <- variable_design(
#' catch_dist = nb_catch(10, 11),
#' pool_strat = pool_max_size(20),
#' sensitivity = 0.95,
#' specificity = 0.98
#' )
fixed_design <- function(pool_size = NULL,
pool_number = NULL,
sensitivity = 1,
specificity = 1) {

# allow NULLs for optimise functions to identify which
# variable should be optimised
if (!is.null(pool_size)) {
check_geq2(pool_size, 0)
}
if (!is.null(pool_number)) {
check_geq2(pool_number, 0)
}
# sens and spec cannot be NULL
check_in_range2(sensitivity)
check_in_range2(specificity)

structure(
list(
pool_size = pool_size,
pool_number = pool_number,
sensitivity = sensitivity,
specificity = specificity
),
class = c("fixed_design", "sample_design")
)
}

#' @rdname fixed_design
#' @export
variable_design <- function(catch_dist,
pool_strat,
sensitivity = 1,
specificity = 1) {

# sens and spec cannot be NULL
check_in_range2(sensitivity)
check_in_range2(specificity)

structure(
list(
catch_dist = catch_dist,
pool_strat = pool_strat,
sensitivity = sensitivity,
specificity = specificity
),
class = c("variable_design", "sample_design")
)
}
67 changes: 15 additions & 52 deletions man/design_effect.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading