Skip to content

Commit

Permalink
GIT: Merge pull request #38 from AngusMcLure/power_out
Browse files Browse the repository at this point in the history
Fixes #36
  • Loading branch information
fredjaya authored Jun 19, 2024
2 parents 126e23a + b6821dd commit 7ab5a24
Show file tree
Hide file tree
Showing 11 changed files with 541 additions and 63 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ LazyData: true
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Imports:
glue,
hypergeo,
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Generated by roxygen2: do not edit by hand

S3method(as.character,pool_strat)
S3method(format,pool_strat)
S3method(print,pool_strat)
S3method(print,power_size_results)
export(design_effect)
export(design_effect_random)
export(detection_errors)
Expand All @@ -18,5 +21,6 @@ export(pool_max_size)
export(pool_target_number)
export(power_pool)
export(power_pool_random)
export(power_size_results)
export(sample_size_pool)
export(sample_size_pool_random)
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,7 @@ $$

where $\gamma$ is the variance of standard logistic random variable (i.e. $\pi^2/3$). (See Goldstein et al 2002)

The advantage of this version of the ICC, in addition to ease of computation, is independence of $\theta$. Therefore in a mixed effect model, the ICCs are the same for every combination of fixed effects. The major downside is that while their names would suggest that ICCs should approximate $\rho_1$ and $\rho_2$, when prevalence is low (or high) the ICCs are potentially orders of magnitude larger than the latter. Nevertheless, if the 'correlation coefficients' reported in the literature are going to be of the type calculated by STATA, and these are going to be used as a starting point for sample size calculations, then it is worth considering parameterising our Fisher information calculations in terms of $\theta$ and $ICC$.
The advantage of this version of the ICC, in addition to ease of computation, is independence of $\theta$. Therefore in a mixed effect model, the ICCs are the same for every combination of fixed effects. The major downside is that while their names would suggest that ICCs should approximate $\rho_1$ and $\rho_2$, whenever prevalence is close to 0 (or 1) the ICCs are potentially orders of magnitude larger than the latter. Nevertheless, if the 'correlation coefficients' reported in the literature are going to be of the type calculated by STATA, and these are going to be used as a starting point for sample size calculations, then it is worth considering parameterising our Fisher information calculations in terms of $\theta$ and $ICC$.

This can be done fairly straightforwardly as we have in the previous sections with a change of variables.

Expand Down
38 changes: 30 additions & 8 deletions R/pool_strat_methods.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,40 @@
#' Methods for pool_strat obejcts
#' Constructor for pool_strat objects
#'
#' @param x An object of class `pool_strat` (typically produced by one of the
#' built-in functions for generating them such as `pool_max_size()` and
#' `pool_taget_number()`
#' @param ... Ignored
#' @param strat function that takes an integer number of units and returns the
#' number and size of pools to split the units across
#' @param call call used to generate the pool_strat function. Used for format
#' and as.character method (pretty printing)
#' @param description text description of pool strategy to be used in print
#' function
#' @rdname pool_strat_methods
#' @export

pool_strat <- function(strat, call, description){
if(!inherits(strat, 'function')) {stop('strat must be a function')}
ps <- strat
attr(ps, 'call') <- call
attr(ps, 'description') <- description
class(ps) <- c('pool_strat', 'function')
return(ps)
}

#' @export
format.pool_strat <- function(x,...){
return(format(attr(x,'call'),...))
}

#' @export
as.character.pool_strat <- function(x,...){
return(format(attr(x,'call'),...))
}

#' @export
print.pool_strat <- function(x,...){
if(is.null(attr(x, 'description'))){
print.function(x)
}else{
print(paste0('A pooling strategy that ', attr(x, 'description')))
cat(paste0('A pooling strategy that ', attr(x, 'description')))
}
}
invisible(x)
}


18 changes: 10 additions & 8 deletions R/pooling_strategies.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@ pool_max_size <- function(max_size){
return(list(pool_size = c(catch%%max_size, max_size), pool_number = c(1, catch%/%max_size)))
}
}
attr(strat, 'call') <- match.call()
attr(strat, 'description') <- paste('that places units in pools of size', max_size, 'with any remainder placed in a single smaller pool.')
class(strat) <- 'pool_strat'
strat
pool_strat(strat = strat,
call = match.call(),
description = paste('places units in pools of size', max_size, 'and any remainder in a single smaller pool.'))
}

#' @rdname pooling_strategies
Expand All @@ -54,10 +53,13 @@ pool_target_number <- function(target_number){
return(list(pool_size = c(base_size, base_size + 1), pool_number = c(base_number, target_number-base_number)))
}
}
attr(strat, 'call') <- match.call()
attr(strat, 'description') <- paste(if(target_number == 1){'places all units in 1 pool,'}else{paste('aims to distribute units into', target_number, 'equally sized pools,')},'with no maximum pool size')
class(strat) <- 'pool_strat'
strat
pool_strat(strat,
match.call(),
paste(if(target_number == 1){
'places all units in 1 pool,'}
else{
paste('aims to distribute units into', target_number, 'equally sized pools,')
},'with no maximum pool size'))
}


Expand Down
127 changes: 120 additions & 7 deletions R/power.R
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,38 @@ power_pool <- function(pool_size, pool_number, cluster_number,
stats::pnorm(((g(thetaa) - g(theta0)) - stats::qnorm(1-sig_level/2)/sqrt(fi0)) * sqrt(fia)),
stop('invalid alternative. options are less, greater, and two.sided')
)
power

# Prepare output
total_pools <- cluster_number * pool_number
total_units <- total_pools * pool_size
text <- paste(
"A survey design using",
is_perfect_test(sensitivity, specificity),
"diagnostic test on pooled samples with the above parameters has a statistical power of",
round(power, 3)
)

power_size_results(
sensitivity = sensitivity,
specificity = specificity,
# prevalence
prev_null = theta0,
prev_alt = thetaa,
correlation = correlation,
# statistical test
sig_level = sig_level,
power = power,
alternative = alternative,
# sample design
pool_size = pool_size,
pool_number = pool_number,
cluster_number = cluster_number,
total_pools = total_pools,
total_units = total_units,
# parsing
text = text
)

}

#' @rdname power_pool
Expand Down Expand Up @@ -228,7 +259,38 @@ power_pool_random <- function(catch_dist, pool_strat, cluster_number,
stats::pnorm(((g(thetaa) - g(theta0)) - stats::qnorm(1-sig_level/2)/sqrt(fi0)) * sqrt(fia)),
stop('invalid alternative. options are less, greater, and two.sided')
)
power

# Prepare output
if (sensitivity == 1 && specificity == 1) {
perf = "a perfect"
} else {
perf = "an imperfect"
}
text = paste(
"A survey design using",
is_perfect_test(sensitivity, specificity),
"diagnostic test on pooled samples with the above parameters has a statistical power of",
round(power, 3)
)

power_size_results(
sensitivity = sensitivity,
specificity = specificity,
# prevalence
prev_null = theta0,
prev_alt = thetaa,
correlation = correlation,
# statistical test
sig_level = sig_level,
power = power,
alternative = alternative,
# sample design
catch_dist = catch_dist,
pool_strat = as.character(pool_strat),
cluster_number = cluster_number,
# parsing
text = text
)
}

#' @rdname power_pool
Expand Down Expand Up @@ -291,9 +353,34 @@ sample_size_pool <- function(pool_size, pool_number,
total_clusters <- ceiling(total_clusters_raw)
total_pools <- total_clusters * pool_number
total_units <- total_pools * pool_size
text <- paste0(
"A survey design using ", is_perfect_test(sensitivity, specificity),
" diagnostic test on pooled samples with the above parameters requires a total of ",
total_clusters, " clusters, ",
total_pools, " total pools, and ",
total_units, " total units."
)

return(list(clusters = total_clusters, pools = total_pools, units = total_units))

power_size_results(
sensitivity = sensitivity,
specificity = specificity,
# prevalence
prev_null = theta0,
prev_alt = thetaa,
correlation = correlation,
# statistical test
sig_level = sig_level,
power = power,
alternative = alternative,
# sample design
pool_size = pool_size,
pool_number = pool_number,
cluster_number = total_clusters,
total_pools = total_pools,
total_units = total_units,
# parsing
text = text
)
}


Expand Down Expand Up @@ -366,8 +453,34 @@ sample_size_pool_random <- function(catch_dist, pool_strat,
exp_total_pools <- round(ev(\(catch) sum(pool_strat(catch)$pool_number),
catch_dist, max_iter, rel_tol) * total_clusters, 1)

return(list(clusters = total_clusters,
expected_pools = exp_total_pools,
expected_units = exp_total_units))
# Prepare output
text = paste0(
"A survey design using ", is_perfect_test(sensitivity, specificity),
" diagnostic test on pooled samples with the above parameters requires a total of ",
total_clusters, " clusters, ",
exp_total_pools, " expected total pools, and ",
exp_total_units, " expected total units."
)

power_size_results(
sensitivity = sensitivity,
specificity = specificity,
# prevalence
prev_null = theta0,
prev_alt = thetaa,
correlation = correlation,
# statistical test
sig_level = sig_level,
power = power,
alternative = alternative,
# sample design
catch_dist = catch_dist,
pool_strat = as.character(pool_strat),
cluster_number = total_clusters,
exp_total_pools = exp_total_pools,
exp_total_units = exp_total_units,
# parsing
text = text
)

}
Loading

0 comments on commit 7ab5a24

Please sign in to comment.