GIT: Merge pull request #38 from AngusMcLure/power_out

Fixes #36
AngusMcLure · Jun 19, 2024 · 7ab5a24 · 7ab5a24
2 parents 126e23a + b6821dd
commit 7ab5a24
Show file tree

Hide file tree

Showing 11 changed files with 541 additions and 63 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -16,7 +16,7 @@ LazyData: true
 Suggests: 
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 Imports: 
     glue,
     hypergeo,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(as.character,pool_strat)
+S3method(format,pool_strat)
 S3method(print,pool_strat)
+S3method(print,power_size_results)
 export(design_effect)
 export(design_effect_random)
 export(detection_errors)
@@ -18,5 +21,6 @@ export(pool_max_size)
 export(pool_target_number)
 export(power_pool)
 export(power_pool_random)
+export(power_size_results)
 export(sample_size_pool)
 export(sample_size_pool_random)
diff --git a/Paper/Designing pool-tested surveys for detection estimation of prevalence.md b/Paper/Designing pool-tested surveys for detection estimation of prevalence.md
@@ -808,7 +808,7 @@ $$
 
 where $\gamma$ is the variance of standard logistic random variable (i.e. $\pi^2/3$). (See Goldstein et al 2002)
 
-The advantage of this version of the ICC, in addition to ease of computation, is independence of $\theta$. Therefore in a mixed effect model, the ICCs are the same for every combination of fixed effects. The major downside is that while their names would suggest that ICCs should approximate $\rho_1$ and $\rho_2$, when prevalence is low (or high) the ICCs are potentially orders of magnitude larger than the latter. Nevertheless, if the 'correlation coefficients' reported in the literature are going to be of the type calculated by STATA, and these are going to be used as a starting point for sample size calculations, then it is worth considering parameterising our Fisher information calculations in terms of $\theta$ and $ICC$.
+The advantage of this version of the ICC, in addition to ease of computation, is independence of $\theta$. Therefore in a mixed effect model, the ICCs are the same for every combination of fixed effects. The major downside is that while their names would suggest that ICCs should approximate $\rho_1$ and $\rho_2$, whenever prevalence is close to 0 (or 1) the ICCs are potentially orders of magnitude larger than the latter. Nevertheless, if the 'correlation coefficients' reported in the literature are going to be of the type calculated by STATA, and these are going to be used as a starting point for sample size calculations, then it is worth considering parameterising our Fisher information calculations in terms of $\theta$ and $ICC$.
 
 This can be done fairly straightforwardly as we have in the previous sections with a change of variables.
 

diff --git a/R/pool_strat_methods.R b/R/pool_strat_methods.R
@@ -1,18 +1,40 @@
-#' Methods for pool_strat obejcts
+#' Constructor for pool_strat objects
 #'
-#' @param x An object of class `pool_strat` (typically produced by one of the
-#'   built-in functions for generating them such as `pool_max_size()` and
-#'   `pool_taget_number()`
-#' @param ... Ignored
+#' @param strat function that takes an integer number of units and returns the
+#'   number and size of pools to split the units across
+#' @param call call used to generate the pool_strat function. Used for format
+#'   and as.character method (pretty printing)
+#' @param description text description of pool strategy to be used in print
+#'   function
 #' @rdname pool_strat_methods
-#' @export
 
+pool_strat <- function(strat, call, description){
+  if(!inherits(strat, 'function')) {stop('strat must be a function')}
+  ps <- strat
+  attr(ps, 'call') <- call
+  attr(ps, 'description') <- description
+  class(ps) <- c('pool_strat', 'function')
+  return(ps)
+}
+
+#' @export
+format.pool_strat <- function(x,...){
+  return(format(attr(x,'call'),...))
+}
 
+#' @export
+as.character.pool_strat <- function(x,...){
+  return(format(attr(x,'call'),...))
+}
 
+#' @export
 print.pool_strat <- function(x,...){
   if(is.null(attr(x, 'description'))){
     print.function(x)
   }else{
-    print(paste0('A pooling strategy that ', attr(x, 'description')))
+    cat(paste0('A pooling strategy that ', attr(x, 'description')))
   }
-}
+  invisible(x)
+}
+
+
diff --git a/R/pooling_strategies.R b/R/pooling_strategies.R
@@ -33,10 +33,9 @@ pool_max_size <- function(max_size){
       return(list(pool_size = c(catch%%max_size, max_size), pool_number = c(1, catch%/%max_size)))
     }
   }
-  attr(strat, 'call') <- match.call()
-  attr(strat, 'description') <- paste('that places units in pools of size', max_size, 'with any remainder placed in a single smaller pool.')
-  class(strat) <- 'pool_strat'
-  strat
+  pool_strat(strat = strat,
+             call = match.call(),
+             description = paste('places units in pools of size', max_size, 'and any remainder in a single smaller pool.'))
 }
 
 #' @rdname pooling_strategies
@@ -54,10 +53,13 @@ pool_target_number <- function(target_number){
       return(list(pool_size = c(base_size, base_size + 1), pool_number = c(base_number, target_number-base_number)))
     }
   }
-  attr(strat, 'call') <- match.call()
-  attr(strat, 'description') <- paste(if(target_number == 1){'places all units in 1 pool,'}else{paste('aims to distribute units into', target_number, 'equally sized pools,')},'with no maximum pool size')
-  class(strat) <- 'pool_strat'
-  strat
+  pool_strat(strat,
+             match.call(),
+             paste(if(target_number == 1){
+               'places all units in 1 pool,'}
+               else{
+                 paste('aims to distribute units into', target_number, 'equally sized pools,')
+                 },'with no maximum pool size'))
 }
 
 

diff --git a/R/power.R b/R/power.R
@@ -153,7 +153,38 @@ power_pool <- function(pool_size, pool_number, cluster_number,
                     stats::pnorm(((g(thetaa) - g(theta0))  - stats::qnorm(1-sig_level/2)/sqrt(fi0)) * sqrt(fia)),
                   stop('invalid alternative. options are less, greater, and two.sided')
   )
-  power
+
+  # Prepare output
+  total_pools <- cluster_number * pool_number
+  total_units <- total_pools * pool_size
+  text <- paste(
+    "A survey design using", 
+    is_perfect_test(sensitivity, specificity), 
+    "diagnostic test on pooled samples with the above parameters has a statistical power of", 
+    round(power, 3)
+  )
+
+  power_size_results(  
+    sensitivity = sensitivity,
+    specificity = specificity,
+    # prevalence
+    prev_null = theta0,
+    prev_alt = thetaa,
+    correlation = correlation,
+    # statistical test
+    sig_level = sig_level,
+    power = power,
+    alternative = alternative,
+    # sample design
+    pool_size = pool_size,
+    pool_number = pool_number,
+    cluster_number = cluster_number,
+    total_pools = total_pools,
+    total_units = total_units,
+    # parsing
+    text = text
+  )
+
 }
 
 #' @rdname power_pool
@@ -228,7 +259,38 @@ power_pool_random <- function(catch_dist, pool_strat, cluster_number,
                     stats::pnorm(((g(thetaa) - g(theta0))  - stats::qnorm(1-sig_level/2)/sqrt(fi0)) * sqrt(fia)),
                   stop('invalid alternative. options are less, greater, and two.sided')
   )
-  power
+
+  # Prepare output
+  if (sensitivity == 1 && specificity == 1) {
+    perf = "a perfect"
+  } else {
+    perf = "an imperfect"
+  }
+  text = paste(
+    "A survey design using", 
+    is_perfect_test(sensitivity, specificity), 
+    "diagnostic test on pooled samples with the above parameters has a statistical power of",
+    round(power, 3)
+  )
+
+  power_size_results(  
+    sensitivity = sensitivity,
+    specificity = specificity,
+    # prevalence
+    prev_null = theta0,
+    prev_alt = thetaa,
+    correlation = correlation,
+    # statistical test
+    sig_level = sig_level,
+    power = power,
+    alternative = alternative,
+    # sample design
+    catch_dist = catch_dist,
+    pool_strat = as.character(pool_strat),
+    cluster_number = cluster_number,
+    # parsing
+    text = text
+  )
 }
 
 #' @rdname power_pool
@@ -291,9 +353,34 @@ sample_size_pool <- function(pool_size, pool_number,
   total_clusters <- ceiling(total_clusters_raw)
   total_pools <- total_clusters * pool_number
   total_units <- total_pools * pool_size
+  text <- paste0(
+    "A survey design using ", is_perfect_test(sensitivity, specificity), 
+    " diagnostic test on pooled samples with the above parameters requires a total of ",
+    total_clusters, " clusters, ", 
+    total_pools, " total pools, and ", 
+    total_units, " total units."
+  )
 
-  return(list(clusters = total_clusters, pools = total_pools, units = total_units))
-
+  power_size_results(  
+    sensitivity = sensitivity,
+    specificity = specificity,
+    # prevalence
+    prev_null = theta0,
+    prev_alt = thetaa,
+    correlation = correlation,
+    # statistical test
+    sig_level = sig_level,
+    power = power,
+    alternative = alternative,
+    # sample design
+    pool_size = pool_size,
+    pool_number = pool_number,
+    cluster_number = total_clusters,
+    total_pools = total_pools,
+    total_units = total_units,
+    # parsing
+    text = text
+  )
 }
 
 
@@ -366,8 +453,34 @@ sample_size_pool_random <- function(catch_dist, pool_strat,
   exp_total_pools <- round(ev(\(catch) sum(pool_strat(catch)$pool_number),
                               catch_dist, max_iter, rel_tol) * total_clusters, 1)
 
-  return(list(clusters = total_clusters,
-              expected_pools = exp_total_pools,
-              expected_units = exp_total_units))
+  # Prepare output
+  text = paste0(
+    "A survey design using ", is_perfect_test(sensitivity, specificity), 
+    " diagnostic test on pooled samples with the above parameters requires a total of ",
+    total_clusters, " clusters, ", 
+    exp_total_pools, " expected total pools, and ", 
+    exp_total_units, " expected total units."
+  )
+
+  power_size_results(  
+    sensitivity = sensitivity,
+    specificity = specificity,
+    # prevalence
+    prev_null = theta0,
+    prev_alt = thetaa,
+    correlation = correlation,
+    # statistical test
+    sig_level = sig_level,
+    power = power,
+    alternative = alternative,
+    # sample design
+    catch_dist = catch_dist,
+    pool_strat = as.character(pool_strat),
+    cluster_number = total_clusters,
+    exp_total_pools = exp_total_pools,
+    exp_total_units = exp_total_units,
+    # parsing
+    text = text
+  )
 
 }