add power_pool() and sample_size_pool()

AngusMcLure · Mar 28, 2024 · a0fce8a · a0fce8a
1 parent 46b9b64
commit a0fce8a
Show file tree

Hide file tree

Showing 6 changed files with 384 additions and 76 deletions.
diff --git a/InProgress/power.R → InProgress/power_dev.R b/InProgress/power.R → InProgress/power_dev.R
@@ -1,17 +1,16 @@
 
-power_fi <- function(n, s, N, prevalence.null, prevalence.alternative,
+power_fi <- function(n, pool_size, pool_number, prevalence.null, prevalence.alternative,
+                     correlation = 0, sensitivity = 1, specificity = 1,
                      sig.level = 0.05, alternative = 'greater',
-                     sensitivity = 1, specificity = 1,
-                     correlation = 0, form = 'beta',
-                     link = 'identity'){
+                     form = 'beta', link = 'identity'){
   thetaa <- prevalence.alternative
   theta0 <- prevalence.null
-
+  
   # The idea here was that the hypothesis test would be for mu rather than theta, with
   # the idea that this would be equivalent to a test on theta, i.e. if mu0 =
   # g(theta0)<g(thetaa) = mua then theta0 < thetaa. However, this is not true
   # since differences in rho/correlations allows for theta0 > thetaa
-
+  
   # if(real.scale & form %in% c('logitnorm', 'cloglognorm')){ g <- function(x){
   # #calculate mu from theta and rho .var <- correlation * x * (1-x)
   # mu_sigma_linknorm(x,.var, link = switch(form, logitnorm = qlogis,
@@ -23,29 +22,33 @@ power_fi <- function(n, s, N, prevalence.null, prevalence.alternative,
               cloglog = cloglog,
               log = log,
               identity = function(x){x})
-
+  
   gdivinv <- switch(link,
                     logit = function(x){x * (1-x)},
                     cloglog = function(x){-log1p(-x) * (1-x)},
                     log = function(x){x},
                     identity = function(x){1})
   # }
-
-  fia <- n/(s*N) * gdivinv(thetaa)^2 /
-    solve(fi_pool_imperfect_cluster(s = s, N = N, p = thetaa,
-                                    sensitivity = sensitivity,
-                                    specificity = specificity,
-                                    correlation = correlation,
-                                    form = form))[1,1]
+
+  fia <- n/(pool_size*pool_number) * gdivinv(thetaa)^2 /
+    solve(fi_pool_cluster(pool_size = pool_size,
+                          pool_number = pool_number,
+                          prevalence = thetaa,
+                          correlation = correlation,
+                          sensitivity = sensitivity,
+                          specificity = specificity,
+                          form = form))[1,1]
   #print(fia)
-  fi0 <- n/(s*N) * gdivinv(theta0)^2 /
-    solve(fi_pool_imperfect_cluster(s = s, N = N, p = theta0,  #should this be theta0 or thetaa?
-                                    sensitivity = sensitivity,
-                                    specificity = specificity,
-                                    correlation = correlation,
-                                    form = form))[1,1]
+  fi0 <- n/(pool_size*pool_number) * gdivinv(theta0)^2 /
+    solve(fi_pool_cluster(pool_size = pool_size,
+                          pool_number = pool_number,
+                          prevalence = theta0,  #should this be theta0 or thetaa?
+                          correlation = correlation,
+                          sensitivity = sensitivity,
+                          specificity = specificity,
+                          form = form))[1,1]
   #print(fi0)
-
+  
   power <- switch(alternative,
                   less = pnorm(((g(theta0) - g(thetaa))  - qnorm(1-sig.level)/sqrt(fi0)) * sqrt(fia)),
                   greater = pnorm(((g(thetaa) - g(theta0))  - qnorm(1-sig.level)/sqrt(fi0)) * sqrt(fia)),
@@ -56,51 +59,55 @@ power_fi <- function(n, s, N, prevalence.null, prevalence.alternative,
   power
 }
 
-sample_size_prevalence <- function(s, N, prevalence.null, prevalence.alternative,
+sample_size_prevalence <- function(pool_size, pool_number, prevalence.null, prevalence.alternative,
                                    power = 0.8, sig.level = 0.05, alternative = 'greater',
                                    sensitivity = 1, specificity = 1,
                                    correlation = 0, form = 'beta',
                                    link = 'identity'){
   thetaa <- prevalence.alternative
   theta0 <- prevalence.null
-
+  
   if(!(alternative %in% c('less', 'greater'))){
     stop('currently only supports one-sided tests. Valid options for alternative are less and greater')
   }
-
+  
   if(alternative == 'less' & theta0 < thetaa){
     stop('If alternative == "less", then prevalence.altnerative must be less than or equal to prevalence.null' )
   }
-
+  
   if(alternative == 'greater' & theta0 > thetaa){
     stop('If alternative == "greater", then prevalence.altnerative must be greater than or equal to prevalence.null' )
   }
-
+  
   g <- switch(link,
               logit = qlogis,
               cloglog = cloglog,
               log = log,
               identity = function(x){x})
-
+  
   gdivinv <- switch(link,
                     logit = function(x){x * (1-x)},
                     cloglog = function(x){-log1p(-x) * (1-x)},
                     log = function(x){x},
                     identity = function(x){1})
-
-  unit_fia <- 1/(s*N) * gdivinv(thetaa)^2 /
-    solve(fi_pool_imperfect_cluster(s = s, N = N, p = thetaa,
-                                    sensitivity = sensitivity,
-                                    specificity = specificity,
-                                    correlation = correlation,
-                                    form = form))[1,1]
-  unit_fi0 <- 1/(s*N) * gdivinv(theta0)^2 /
-    solve(fi_pool_imperfect_cluster(s = s, N = N, p = theta0,
-                                    sensitivity = sensitivity,
-                                    specificity = specificity,
-                                    correlation = correlation,
-                                    form = form))[1,1]
-
+
+  unit_fia <- 1/(pool_size*pool_number) * gdivinv(thetaa)^2 /
+    solve(fi_pool_cluster(pool_size = pool_size,
+                          pool_number = pool_number,
+                          prevalence = thetaa,
+                          correlation = correlation,
+                          sensitivity = sensitivity,
+                          specificity = specificity,
+                          form = form))[1,1]
+  unit_fi0 <- 1/(pool_size*pool_number) * gdivinv(theta0)^2 /
+    solve(fi_pool_cluster(pool_size = pool_size,
+                          pool_number = pool_number,
+                          prevalence = theta0,
+                          correlation = correlation,
+                          sensitivity = sensitivity,
+                          specificity = specificity,
+                          form = form))[1,1]
+
   # Note that the below is correct for either kind of one-sided test, but not for two sided tests
   ((qnorm(power)/sqrt(unit_fia) + qnorm(1 - sig.level)/sqrt(unit_fi0))/(g(theta0) - g(thetaa)))^2
 }
@@ -111,41 +118,41 @@ optimise_s_prevalence_power <- function(prevalence.null, prevalence.alternative,
                                         cost.unit, cost.pool, cost.location = 0,
                                         sig.level = 0.05,  power = 0.8,
                                         alternative = 'less', link = 'logit',
-                                        correlation = 0, N = 1, form = 'beta',
+                                        correlation = 0, pool_number = 1, form = 'beta',
                                         sensitivity = 1, specificity = 1,
-                                        max.s = 200, interval = 0){
-  ss <- function(s){
-    sample_size_prevalence(s, prevalence.null, prevalence.alternative,
+                                        max.pool_size = 200, interval = 0){
+  ss <- function(pool_size){
+    sample_size_prevalence(pool_size, prevalence.null, prevalence.alternative,
                            sig.level,  power,
                            alternative, link,
                            sensitivity, specificity,
                            correlation = correlation,
-                           N = N, form = form)
+                           pool_number = pool_number, form = form)
   }
-
-  cost <- function(s){
-    n <- ss(s)
-    out <- n * (cost.unit + cost.pool/s + cost.location/(s*N))
+  
+  cost <- function(pool_size){
+    n <- ss(pool_size)
+    out <- n * (cost.unit + cost.pool/pool_size + cost.location/(pool_size*pool_number))
     out
   }
-
-  opt <- optimise(cost,c(1,max.s))
-
+  
+  opt <- optimise(cost,c(1,max.pool_size))
+  
   cost_opt_ceiling <- cost(ceiling(opt$minimum))
   cost_opt_floor   <- cost(floor(opt$minimum))
-
+  
   if(cost_opt_ceiling < cost_opt_floor){
     optimal_s <- ceiling(opt$minimum)
     opt_cost <- cost_opt_ceiling
   }else{
     optimal_s <- floor(opt$minimum)
     opt_cost <- cost_opt_floor
   }
-
+  
   optimal_n <- ceiling(ss(optimal_s)/optimal_s) * optimal_s
-
-  if(optimal_s == max.s) warning('Maximum cost effectivness is achieved at or above the maximum size of pools allowed. Consider increasing max.s')
-
+  
+  if(optimal_s == max.pool_size) warning('Maximum cost effectivness is achieved at or above the maximum size of pools allowed. Consider increasing max.pool_size')
+  
   if(interval == 0){
     return(list(optimal_s = optimal_s, optimal_cost = opt_cost, optimal_n = optimal_n))
   }else if(interval > 0){
@@ -155,13 +162,13 @@ optimise_s_prevalence_power <- function(prevalence.null, prevalence.alternative,
     } else{
       lower <- ceiling(uniroot(function(x){cost(x) - max_cost},c(1,optimal_s))$root)
     }
-    if(cost(max.s) < max_cost){
-      upper <- max.s
-      warning('A pool size greater than max.s may fall within the specified range of cost effectiveness. Consider increasing max.s')
+    if(cost(max.pool_size) < max_cost){
+      upper <- max.pool_size
+      warning('A pool size greater than max.pool_size may fall within the specified range of cost effectiveness. Consider increasing max.pool_size')
     }else{
-      upper <- floor(uniroot(function(x){cost(x) - max_cost},c(optimal_s,max.s))$root)
+      upper <- floor(uniroot(function(x){cost(x) - max_cost},c(optimal_s,max.pool_size))$root)
     }
-
+    
     cost_interval <- c(cost(lower),cost(upper))
     n_interval <- c(ss(lower), ss(upper))
     out <- list(optimal_s = optimal_s,
@@ -182,18 +189,18 @@ power_fi(1000,10,0.01,0.005, specificity = 0.999, link = 'identity')
 
 theta.null <- 0.01
 theta.alt  <- 0.005
-s <- 10
-N <- 2
+pool_size <- 10
+pool_number <- 2
 rho <- 0.1
 
 plot(function(x){binom::binom.power(x,alternative = 'less',n = 750,p = theta.null,method = 'asym')}, to = 0.01, n = 100000)
 plot(function(x){binom::binom.power(x,alternative = 'less',n = 750,p = theta.null,method = 'logit')}, to = 0.01, n = 100000,add = TRUE)
 
-de <- design_effect_cluster_fisher(s,N,theta.null,1,1,rho, form = 'beta')
+de <- design_effect_cluster_fisher(pool_size,pool_number,theta.null,1,1,rho, form = 'beta')
 de
 ss.indi.arcsin <- pwr::pwr.p.test(h = pwr::ES.h(theta.alt,theta.null), power = 0.8, sig.level = 0.05, alternative = 'less')
 ss.indi.arcsin
-ss.pool.asympt <- sample_size_prevalence(s,N,theta.null,theta.alt,0.8,sig.level = 0.05,alternative = 'less',correlation = 0.1)
+ss.pool.asympt <- sample_size_prevalence(pool_size,pool_number,theta.null,theta.alt,0.8,sig.level = 0.05,alternative = 'less',correlation = 0.1)
 ss.pool.asympt
 ss.indi.arcsin$n * de
 
@@ -218,8 +225,8 @@ plot(function(x){Vectorize(power_fi)(750,5,prevalence.null = 0.01, prevalence.al
 plot(function(x){Vectorize(power_fi)(750,1,prevalence.null = 0.01, prevalence.alternative = x, link = 'identity', specificity = 1)}, to = 0.01, n = 100000, col = 'blue',add = TRUE)
 plot(function(x){Vectorize(power_fi)(750,1,prevalence.null = 0.01, prevalence.alternative = x, link = 'logit', specificity = 1)}, to = 0.01, n = 100000, col = 'blue', add = TRUE)
 
-
 plot(function(x){Vectorize(power_fi)(750,x,prevalence.null = 0.01, prevalence.alternative = 0.005, link = 'identity',specificity = 0.9)}, from = 1, to = 100, n = 100, ylim = c(0,1))
+
 plot(function(x){Vectorize(power_fi)(750,x,prevalence.null = 0.01, prevalence.alternative = 0.005, link = 'logit',specificity = 0.9)}, from =1, to = 100, n = 100, add= TRUE)
 
 
@@ -235,5 +242,5 @@ plot(function(x){n <- Vectorize(sample_size_prevalence)(x,2,prevalence.null = 0.
 plot(function(x){n <- Vectorize(sample_size_prevalence)(x,2,prevalence.null = 0.01, prevalence.alternative = 0.015, correlation = 0.1, link = 'logit', sensitivity = 1, specificity = 1, alternative = 'greater')}, from = 1, to = 40, n = 20, col = 'red', add = TRUE)
 
 
-Vectorize(optimise_s_prevalence_power)(0.01,seq(0.001,0.009,0.001),1,1.5, specificity = 0.99,correlation = 0, N = 1,alternative = 'less', interval = 0.05, link= 'logit')
+Vectorize(optimise_s_prevalence_power)(0.01,seq(0.001,0.009,0.001),1,1.5, specificity = 0.99,correlation = 0, pool_number = 1,alternative = 'less', interval = 0.05, link= 'logit')
 
diff --git a/NAMESPACE b/NAMESPACE
@@ -14,3 +14,5 @@ export(optimise_s_prevalence)
 export(pois_catch)
 export(pool_max_size)
 export(pool_target_number)
+export(power_pool)
+export(sample_size_pool)
diff --git a/R/fisher_information.R b/R/fisher_information.R
@@ -10,9 +10,9 @@
 #' designs.
 #'
 #' @param pool_size numeric The number of units per pool. Must be a numeric
-#'   value greater than or equal to 0.
-#' @param pool_number numeric The number of pools per cluster. Must be a numeric
-#'   value greater than or equal to 0.
+#'   value or vector of values greater than 0.
+#' @param pool_number numeric The number of pools per cluster. Must be a integer
+#'   value or a vector of integer values greater than or equal to 1.
 #' @param catch_dist An object of class `distribution` (e.g. produced by
 #'   `nb_catch()`) defining the distribution of the possible catch. For
 #'   `fi_pool_random` catch is for the whole survey. For