Skip to content

Commit

Permalink
add power_pool() and sample_size_pool()
Browse files Browse the repository at this point in the history
  • Loading branch information
AngusMcLure committed Mar 28, 2024
1 parent 46b9b64 commit a0fce8a
Show file tree
Hide file tree
Showing 6 changed files with 384 additions and 76 deletions.
145 changes: 76 additions & 69 deletions InProgress/power.R → InProgress/power_dev.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@

power_fi <- function(n, s, N, prevalence.null, prevalence.alternative,
power_fi <- function(n, pool_size, pool_number, prevalence.null, prevalence.alternative,
correlation = 0, sensitivity = 1, specificity = 1,
sig.level = 0.05, alternative = 'greater',
sensitivity = 1, specificity = 1,
correlation = 0, form = 'beta',
link = 'identity'){
form = 'beta', link = 'identity'){
thetaa <- prevalence.alternative
theta0 <- prevalence.null

# The idea here was that the hypothesis test would be for mu rather than theta, with
# the idea that this would be equivalent to a test on theta, i.e. if mu0 =
# g(theta0)<g(thetaa) = mua then theta0 < thetaa. However, this is not true
# since differences in rho/correlations allows for theta0 > thetaa

# if(real.scale & form %in% c('logitnorm', 'cloglognorm')){ g <- function(x){
# #calculate mu from theta and rho .var <- correlation * x * (1-x)
# mu_sigma_linknorm(x,.var, link = switch(form, logitnorm = qlogis,
Expand All @@ -23,29 +22,33 @@ power_fi <- function(n, s, N, prevalence.null, prevalence.alternative,
cloglog = cloglog,
log = log,
identity = function(x){x})

gdivinv <- switch(link,
logit = function(x){x * (1-x)},
cloglog = function(x){-log1p(-x) * (1-x)},
log = function(x){x},
identity = function(x){1})
# }

fia <- n/(s*N) * gdivinv(thetaa)^2 /
solve(fi_pool_imperfect_cluster(s = s, N = N, p = thetaa,
sensitivity = sensitivity,
specificity = specificity,
correlation = correlation,
form = form))[1,1]

fia <- n/(pool_size*pool_number) * gdivinv(thetaa)^2 /
solve(fi_pool_cluster(pool_size = pool_size,
pool_number = pool_number,
prevalence = thetaa,
correlation = correlation,
sensitivity = sensitivity,
specificity = specificity,
form = form))[1,1]
#print(fia)
fi0 <- n/(s*N) * gdivinv(theta0)^2 /
solve(fi_pool_imperfect_cluster(s = s, N = N, p = theta0, #should this be theta0 or thetaa?
sensitivity = sensitivity,
specificity = specificity,
correlation = correlation,
form = form))[1,1]
fi0 <- n/(pool_size*pool_number) * gdivinv(theta0)^2 /
solve(fi_pool_cluster(pool_size = pool_size,
pool_number = pool_number,
prevalence = theta0, #should this be theta0 or thetaa?
correlation = correlation,
sensitivity = sensitivity,
specificity = specificity,
form = form))[1,1]
#print(fi0)

power <- switch(alternative,
less = pnorm(((g(theta0) - g(thetaa)) - qnorm(1-sig.level)/sqrt(fi0)) * sqrt(fia)),
greater = pnorm(((g(thetaa) - g(theta0)) - qnorm(1-sig.level)/sqrt(fi0)) * sqrt(fia)),
Expand All @@ -56,51 +59,55 @@ power_fi <- function(n, s, N, prevalence.null, prevalence.alternative,
power
}

sample_size_prevalence <- function(s, N, prevalence.null, prevalence.alternative,
sample_size_prevalence <- function(pool_size, pool_number, prevalence.null, prevalence.alternative,
power = 0.8, sig.level = 0.05, alternative = 'greater',
sensitivity = 1, specificity = 1,
correlation = 0, form = 'beta',
link = 'identity'){
thetaa <- prevalence.alternative
theta0 <- prevalence.null

if(!(alternative %in% c('less', 'greater'))){
stop('currently only supports one-sided tests. Valid options for alternative are less and greater')
}

if(alternative == 'less' & theta0 < thetaa){
stop('If alternative == "less", then prevalence.altnerative must be less than or equal to prevalence.null' )
}

if(alternative == 'greater' & theta0 > thetaa){
stop('If alternative == "greater", then prevalence.altnerative must be greater than or equal to prevalence.null' )
}

g <- switch(link,
logit = qlogis,
cloglog = cloglog,
log = log,
identity = function(x){x})

gdivinv <- switch(link,
logit = function(x){x * (1-x)},
cloglog = function(x){-log1p(-x) * (1-x)},
log = function(x){x},
identity = function(x){1})

unit_fia <- 1/(s*N) * gdivinv(thetaa)^2 /
solve(fi_pool_imperfect_cluster(s = s, N = N, p = thetaa,
sensitivity = sensitivity,
specificity = specificity,
correlation = correlation,
form = form))[1,1]
unit_fi0 <- 1/(s*N) * gdivinv(theta0)^2 /
solve(fi_pool_imperfect_cluster(s = s, N = N, p = theta0,
sensitivity = sensitivity,
specificity = specificity,
correlation = correlation,
form = form))[1,1]


unit_fia <- 1/(pool_size*pool_number) * gdivinv(thetaa)^2 /
solve(fi_pool_cluster(pool_size = pool_size,
pool_number = pool_number,
prevalence = thetaa,
correlation = correlation,
sensitivity = sensitivity,
specificity = specificity,
form = form))[1,1]
unit_fi0 <- 1/(pool_size*pool_number) * gdivinv(theta0)^2 /
solve(fi_pool_cluster(pool_size = pool_size,
pool_number = pool_number,
prevalence = theta0,
correlation = correlation,
sensitivity = sensitivity,
specificity = specificity,
form = form))[1,1]

# Note that the below is correct for either kind of one-sided test, but not for two sided tests
((qnorm(power)/sqrt(unit_fia) + qnorm(1 - sig.level)/sqrt(unit_fi0))/(g(theta0) - g(thetaa)))^2
}
Expand All @@ -111,41 +118,41 @@ optimise_s_prevalence_power <- function(prevalence.null, prevalence.alternative,
cost.unit, cost.pool, cost.location = 0,
sig.level = 0.05, power = 0.8,
alternative = 'less', link = 'logit',
correlation = 0, N = 1, form = 'beta',
correlation = 0, pool_number = 1, form = 'beta',
sensitivity = 1, specificity = 1,
max.s = 200, interval = 0){
ss <- function(s){
sample_size_prevalence(s, prevalence.null, prevalence.alternative,
max.pool_size = 200, interval = 0){
ss <- function(pool_size){
sample_size_prevalence(pool_size, prevalence.null, prevalence.alternative,
sig.level, power,
alternative, link,
sensitivity, specificity,
correlation = correlation,
N = N, form = form)
pool_number = pool_number, form = form)
}

cost <- function(s){
n <- ss(s)
out <- n * (cost.unit + cost.pool/s + cost.location/(s*N))
cost <- function(pool_size){
n <- ss(pool_size)
out <- n * (cost.unit + cost.pool/pool_size + cost.location/(pool_size*pool_number))
out
}

opt <- optimise(cost,c(1,max.s))

opt <- optimise(cost,c(1,max.pool_size))
cost_opt_ceiling <- cost(ceiling(opt$minimum))
cost_opt_floor <- cost(floor(opt$minimum))

if(cost_opt_ceiling < cost_opt_floor){
optimal_s <- ceiling(opt$minimum)
opt_cost <- cost_opt_ceiling
}else{
optimal_s <- floor(opt$minimum)
opt_cost <- cost_opt_floor
}

optimal_n <- ceiling(ss(optimal_s)/optimal_s) * optimal_s

if(optimal_s == max.s) warning('Maximum cost effectivness is achieved at or above the maximum size of pools allowed. Consider increasing max.s')

if(optimal_s == max.pool_size) warning('Maximum cost effectivness is achieved at or above the maximum size of pools allowed. Consider increasing max.pool_size')
if(interval == 0){
return(list(optimal_s = optimal_s, optimal_cost = opt_cost, optimal_n = optimal_n))
}else if(interval > 0){
Expand All @@ -155,13 +162,13 @@ optimise_s_prevalence_power <- function(prevalence.null, prevalence.alternative,
} else{
lower <- ceiling(uniroot(function(x){cost(x) - max_cost},c(1,optimal_s))$root)
}
if(cost(max.s) < max_cost){
upper <- max.s
warning('A pool size greater than max.s may fall within the specified range of cost effectiveness. Consider increasing max.s')
if(cost(max.pool_size) < max_cost){
upper <- max.pool_size
warning('A pool size greater than max.pool_size may fall within the specified range of cost effectiveness. Consider increasing max.pool_size')
}else{
upper <- floor(uniroot(function(x){cost(x) - max_cost},c(optimal_s,max.s))$root)
upper <- floor(uniroot(function(x){cost(x) - max_cost},c(optimal_s,max.pool_size))$root)
}

cost_interval <- c(cost(lower),cost(upper))
n_interval <- c(ss(lower), ss(upper))
out <- list(optimal_s = optimal_s,
Expand All @@ -182,18 +189,18 @@ power_fi(1000,10,0.01,0.005, specificity = 0.999, link = 'identity')

theta.null <- 0.01
theta.alt <- 0.005
s <- 10
N <- 2
pool_size <- 10
pool_number <- 2
rho <- 0.1

plot(function(x){binom::binom.power(x,alternative = 'less',n = 750,p = theta.null,method = 'asym')}, to = 0.01, n = 100000)
plot(function(x){binom::binom.power(x,alternative = 'less',n = 750,p = theta.null,method = 'logit')}, to = 0.01, n = 100000,add = TRUE)

de <- design_effect_cluster_fisher(s,N,theta.null,1,1,rho, form = 'beta')
de <- design_effect_cluster_fisher(pool_size,pool_number,theta.null,1,1,rho, form = 'beta')
de
ss.indi.arcsin <- pwr::pwr.p.test(h = pwr::ES.h(theta.alt,theta.null), power = 0.8, sig.level = 0.05, alternative = 'less')
ss.indi.arcsin
ss.pool.asympt <- sample_size_prevalence(s,N,theta.null,theta.alt,0.8,sig.level = 0.05,alternative = 'less',correlation = 0.1)
ss.pool.asympt <- sample_size_prevalence(pool_size,pool_number,theta.null,theta.alt,0.8,sig.level = 0.05,alternative = 'less',correlation = 0.1)
ss.pool.asympt
ss.indi.arcsin$n * de

Expand All @@ -218,8 +225,8 @@ plot(function(x){Vectorize(power_fi)(750,5,prevalence.null = 0.01, prevalence.al
plot(function(x){Vectorize(power_fi)(750,1,prevalence.null = 0.01, prevalence.alternative = x, link = 'identity', specificity = 1)}, to = 0.01, n = 100000, col = 'blue',add = TRUE)
plot(function(x){Vectorize(power_fi)(750,1,prevalence.null = 0.01, prevalence.alternative = x, link = 'logit', specificity = 1)}, to = 0.01, n = 100000, col = 'blue', add = TRUE)


plot(function(x){Vectorize(power_fi)(750,x,prevalence.null = 0.01, prevalence.alternative = 0.005, link = 'identity',specificity = 0.9)}, from = 1, to = 100, n = 100, ylim = c(0,1))

plot(function(x){Vectorize(power_fi)(750,x,prevalence.null = 0.01, prevalence.alternative = 0.005, link = 'logit',specificity = 0.9)}, from =1, to = 100, n = 100, add= TRUE)


Expand All @@ -235,5 +242,5 @@ plot(function(x){n <- Vectorize(sample_size_prevalence)(x,2,prevalence.null = 0.
plot(function(x){n <- Vectorize(sample_size_prevalence)(x,2,prevalence.null = 0.01, prevalence.alternative = 0.015, correlation = 0.1, link = 'logit', sensitivity = 1, specificity = 1, alternative = 'greater')}, from = 1, to = 40, n = 20, col = 'red', add = TRUE)


Vectorize(optimise_s_prevalence_power)(0.01,seq(0.001,0.009,0.001),1,1.5, specificity = 0.99,correlation = 0, N = 1,alternative = 'less', interval = 0.05, link= 'logit')
Vectorize(optimise_s_prevalence_power)(0.01,seq(0.001,0.009,0.001),1,1.5, specificity = 0.99,correlation = 0, pool_number = 1,alternative = 'less', interval = 0.05, link= 'logit')

2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ export(optimise_s_prevalence)
export(pois_catch)
export(pool_max_size)
export(pool_target_number)
export(power_pool)
export(sample_size_pool)
6 changes: 3 additions & 3 deletions R/fisher_information.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
#' designs.
#'
#' @param pool_size numeric The number of units per pool. Must be a numeric
#' value greater than or equal to 0.
#' @param pool_number numeric The number of pools per cluster. Must be a numeric
#' value greater than or equal to 0.
#' value or vector of values greater than 0.
#' @param pool_number numeric The number of pools per cluster. Must be a integer
#' value or a vector of integer values greater than or equal to 1.
#' @param catch_dist An object of class `distribution` (e.g. produced by
#' `nb_catch()`) defining the distribution of the possible catch. For
#' `fi_pool_random` catch is for the whole survey. For
Expand Down
Loading

0 comments on commit a0fce8a

Please sign in to comment.