Update v1.1.1 (#29)

* Update v1.1.1 * Update NEWS * Add betas to COASTSS
insitro · Nov 22, 2024 · 24849b4 · 24849b4
1 parent 2ed2a8c
commit 24849b4
Show file tree

Hide file tree

Showing 25 changed files with 450 additions and 79 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -6,4 +6,7 @@ vignettes/.redun
 LICENSE.md
 README
 ^AllelicSeries\.Rproj$
+vignettes/coast_cache/*
+vignettes/coast_ss_cache/*
+
 
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,8 @@ src/*.o
 .venv
 __pycache__
 inst/doc
+vignettes/coast_cache/*
+vignettes/coast_ss_cache/*
+
+
+
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: AllelicSeries
 Title: Allelic Series Test
-Version: 0.1.1.0
+Version: 0.1.1.1
 Authors@R:
     c(person(given = "Zachary",
            family = "McCaw",
@@ -12,6 +12,11 @@ Authors@R:
            email = "[email protected]",
            role = "ctb",
            comment = c(ORCID = "0000-0002-1748-625X")),
+	person(given = "Thomas",
+           family = "Soare",
+           email = "[email protected]",
+           role = "ctb",
+           comment = c(ORCID = "0000-0002-4903-8646")),
 	person(given = "Jianhui",
 	   family = "Gao",
 	   email = "[email protected]",

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,6 @@
+## Version 1.1.1
+* Updated `COAST` and `COASTSS` to return estimated effect sizes and standard errors from the allelic series burden test.
+
 ## Version 1.1.0
 * Added the ability to run `COAST` and `COASTSS`, as well as the component tests, with an arbitrary number of discrete annotation categories.
 	- The updated code prefers integer annotation category labels starting at 1. However, the main functions will still support 0-indexed category labels.

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -122,10 +122,13 @@ AnnoMat <- function(anno, n_anno = 3L) {
 #' @param ld (snps x snps) matrix of correlations among the genetic variants.
 #' @param se (snps x 1) vector of standard errors for the effect sizes.
 #' @param n_anno Number of annotation categories L.
-#' @return Numeric p-value.
+#' @param return_beta Return estimated effect sizes and standard errors?
+#'   Default: FALSE.
+#' @return If `return_beta`, a list containing the category effect sizes,
+#'   standard errors, and the p-value. Otherwise, the numeric p-value only.
 #' @export
-BaselineSS <- function(anno, beta, ld, se, n_anno = 3L) {
-    .Call(`_AllelicSeries_BaselineSS`, anno, beta, ld, se, n_anno)
+BaselineSS <- function(anno, beta, ld, se, n_anno = 3L, return_beta = FALSE) {
+    .Call(`_AllelicSeries_BaselineSS`, anno, beta, ld, se, n_anno, return_beta)
 }
 
 #' Allelic Sum Test from Sumstats
@@ -138,9 +141,12 @@ BaselineSS <- function(anno, beta, ld, se, n_anno = 3L) {
 #' @param se (snps x 1) vector of standard errors for the effect sizes.
 #' @param weights (L x 1) vector of annotation category weights. Note that the
 #'   number of annotation categories L is inferred from the length of `weights`.
-#' @return Numeric p-value.
+#' @param return_beta Return estimated effect sizes and standard errors?
+#'   Default: FALSE.
+#' @return If `return_beta`, a list containing the category effect sizes,
+#'   standard errors, and the p-value. Otherwise, the numeric p-value only.
 #' @export
-SumCountSS <- function(anno, beta, ld, se, weights) {
-    .Call(`_AllelicSeries_SumCountSS`, anno, beta, ld, se, weights)
+SumCountSS <- function(anno, beta, ld, se, weights, return_beta = FALSE) {
+    .Call(`_AllelicSeries_SumCountSS`, anno, beta, ld, se, weights, return_beta)
 }
 
diff --git a/R/allelic_series.R b/R/allelic_series.R
@@ -1,5 +1,5 @@
 # Purpose: Allelic series test.
-# Updated: 2024-11-11
+# Updated: 2024-11-20
 
 #' Aggregator
 #'
@@ -115,10 +115,13 @@ Aggregator <- function(
 #' @param method Method for aggregating across categories: ("none", "max",
 #'   "sum"). Default: "none".
 #' @param min_mac Minimum minor allele count for inclusion. Default: 0. 
+#' @param return_beta Return the estimated effect size? Default: FALSE.
 #' @param score_test Run a score test? If FALSE, performs a Wald test.
 #' @param weights (L x 1) vector of annotation category weights. Note that the
 #'   number of annotation categories L is inferred from the length of `weights`.
-#' @return Numeric p-value.
+#' @return If `return_beta = TRUE`, a list of including the effect size
+#'   data.frame "betas" and the p-value "pval". If `return_beta = FALSE`, 
+#'   a numeric p-value.
 #' @examples 
 #' # Generate data.
 #' data <- DGP(n = 1e3, snps = 1e2)
@@ -142,6 +145,7 @@ ASBT <- function(
   is_pheno_binary = FALSE,
   method = "none",
   min_mac = 0,
+  return_beta = FALSE,
   score_test = FALSE,
   weights = c(1, 2, 3)
 ) {
@@ -195,12 +199,23 @@ ASBT <- function(
 
   # Association test.
   if (is_pheno_binary) {
-    pval <- LogisticLRT(y = y, g = agg_geno, x = covar)
+    out <- LogisticLRT(
+      y = y, 
+      g = agg_geno, 
+      x = covar,
+      return_beta = return_beta
+    )
   } else {
-    pval <- LinearTest(y = y, g = agg_geno, x = covar, score_test = score_test)
+    out <- LinearTest(
+      y = y, 
+      g = agg_geno, 
+      x = covar, 
+      return_beta = return_beta,
+      score_test = score_test
+    )
   }
 
-  return(pval)
+  return(out)
 }
 
 
@@ -346,7 +361,6 @@ ASKAT <- function(
 
 # -----------------------------------------------------------------------------
 
-
 #' COding-variant Allelic Series Test
 #'
 #' Main allelic series test. Performs both Burden and SKAT type tests, then
@@ -377,7 +391,8 @@ ASKAT <- function(
 #'   Wald test.
 #' @param weights (L x 1) vector of annotation category weights. Note that the
 #'   number of annotation categories L is inferred from the length of `weights`.
-#' @return Numeric p-value.
+#' @return An object of class `COAST` with slots for effect sizes, variant 
+#'   counts, and p-values.
 #' @examples 
 #' # Generate data.
 #' data <- DGP(n = 1e3, snps = 1e2)
@@ -442,6 +457,7 @@ COAST <- function(
       apply_int = apply_int,
       min_mac = min_mac,
       is_pheno_binary = is_pheno_binary,
+      return_beta = TRUE,
       score_test = score_test,
       ...
     )
@@ -451,35 +467,37 @@ COAST <- function(
   # Burden tests.
   n_anno <- length(weights)
   unif_weights <- rep(1, n_anno)
-  p_base <- BurdenWrap(
+
+  burden_results <- list()
+  burden_results$base <- BurdenWrap(
     indicator = FALSE, method = "none", weights = unif_weights)
 
   # Case of no non-zero variant classes.
-  if (is.na(p_base)) {return(NA)}
+  if (is.na(burden_results$base$pval)) {return(NA)}
 
-  p_ind <- BurdenWrap(
+  burden_results$ind <- BurdenWrap(
     indicator = TRUE, method = "none", weights = unif_weights)
 
-  p_max_count <- BurdenWrap(
+  burden_results$max_count <- BurdenWrap(
     indicator = FALSE, method = "max", weights = weights)
 
-  p_max_ind <- BurdenWrap(
+  burden_results$max_ind <- BurdenWrap(
     indicator = TRUE, method = "max", weights = weights)
 
-  p_sum_count <- BurdenWrap(
+  burden_results$sum_count <- BurdenWrap(
     indicator = FALSE, method = "sum", weights = weights)
 
-  p_sum_ind <- BurdenWrap(
+  burden_results$sum_ind <- BurdenWrap(
     indicator = TRUE, method = "sum", weights = weights)
 
   # Collect p-values.
   p_burden <- c(
-    baseline = p_base,
-    ind = p_ind,
-    max_count = p_max_count,
-    max_ind = p_max_ind,
-    sum_count = p_sum_count,
-    sum_ind = p_sum_ind
+    baseline = burden_results$base$pval,
+    ind = burden_results$ind$pval,
+    max_count = burden_results$max_count$pval,
+    max_ind = burden_results$max_ind$pval,
+    sum_count = burden_results$sum_count$pval,
+    sum_ind = burden_results$sum_ind$pval
   )
 
   # SKAT tests.
@@ -555,9 +573,20 @@ COAST <- function(
     min_mac = min_mac
   )
 
+  # Format betas.
+  burden_labs <- names(burden_results)
+  betas <- lapply(burden_labs, function(lab) {
+    out <- burden_results[[lab]]$betas
+    out$test <- lab
+    out <- out[, c("test", "beta", "se")]
+    return(out)
+  })
+  betas <- do.call(rbind, betas)
+
   # Output.
   out <- methods::new(
     Class = "COAST",
+    Betas = betas,
     Counts = counts,
     Pvals = df_pvals
   )

diff --git a/R/allelic_series_sumstats.R b/R/allelic_series_sumstats.R
@@ -22,9 +22,12 @@
 #'   Although ideally provided, an identity matrix is assumed if not.
 #' @param method Method for aggregating across categories:
 #'   ("none", "sum"). Default: "none".
+#' @param return_beta Return the estimated effect size? Default: FALSE.
 #' @param weights (L x 1) vector of annotation category weights. Note that the
 #'   number of annotation categories L is inferred from the length of `weights`.
-#' @return Numeric p-value of the allelic series burden test.
+#' @return If `return_beta = TRUE`, a list of including the effect size
+#'   data.frame "betas" and the p-value "pval". If `return_beta = FALSE`, 
+#'   a numeric p-value.
 #' @examples
 #' # Generate data.
 #' data <- DGP(n = 1e3)
@@ -48,6 +51,7 @@ ASBTSS <- function(
   lambda = 1,
   ld = NULL,
   method = "none",
+  return_beta = FALSE,
   weights = c(1, 2, 3)
 ){
 
@@ -79,34 +83,52 @@ ASBTSS <- function(
   # Run burden test.
   if (method == "none") {
 
-    pval <- BaselineSS(
+    results <- BaselineSS(
       anno = anno,
       beta = beta,
       ld = ld,
       se = se,
-      n_anno = n_anno
+      n_anno = n_anno,
+      return_beta = TRUE
     )
 
   } else if (method == "sum") {
 
-    pval <- SumCountSS(
+    results <- SumCountSS(
       anno = anno,
       beta = beta,
       ld = ld,
       se = se,
-      weights = weights
+      weights = weights,
+      return_beta = TRUE
     )
 
   } else {
 
     stop("Select method from among 'none' or 'sum'.")
 
   }
-  
+
   # Apply genomic control.
   lambda <- max(1, lambda)
+  pval <- results$pval
   pval <- GenomicControl(lambda = lambda, pval = pval)
-  return(pval)
+
+  # Output.
+  if (return_beta) {
+    effects <- data.frame(
+      beta = results$beta,
+      se = results$se
+    )
+    out <- list(
+      betas = effects,
+      pval = pval
+    )
+  } else {
+    out <- pval
+  }
+
+  return(out)
 }
 
 
@@ -321,28 +343,34 @@ COASTSS <- function(
   # Baseline model p-value.
   n_anno <- length(weights)
   unif_weights <- rep(1, n_anno)
-  p_base <- ASBTSS(
+  burden_results <- list()
+
+  burden_results$base <- ASBTSS(
     anno = anno,
     beta = beta,
     se = se,
     check = FALSE,
     eps = eps,
     ld = ld,
     method = "none",
+    return_beta = TRUE,
     weights = unif_weights
   )
+  p_base <- burden_results$base$pval
 
   # Sum model p-value.
-  p_burden <- ASBTSS(
+  burden_results$sum <- ASBTSS(
     anno = anno,
     beta = beta,
     se = se,
     check = FALSE,
     eps = eps,
     ld = ld,
     method = "sum",
+    return_beta = TRUE,
     weights = weights
   )
+  p_burden <- burden_results$sum$pval
 
   # SKAT model p-value.
   p_skat <- tryCatch({
@@ -371,16 +399,27 @@ COASTSS <- function(
   key <- sapply(pvals, function(x) {!is.na(x)})
   p_omni <- RNOmni::OmniP(pvals[key], pval_weights[key])
 
-  # Output.
+  # P-values.
   df_pvals <- data.frame(
     test = c("baseline", "sum_count", "allelic_skat", "omni"),
     type = c("burden", "burden", "skat", "omni"),
     pval = c(pvals, p_omni)
   )
 
+  # Format betas.
+  burden_labs <- names(burden_results)
+  betas <- lapply(burden_labs, function(lab) {
+    out <- burden_results[[lab]]$betas
+    out$test <- lab
+    out <- out[, c("test", "beta", "se")]
+    return(out)
+  })
+  betas <- do.call(rbind, betas)
+
   # Output.
   out <- methods::new(
     Class = "COAST",
+    Betas = betas,
     Counts = NULL,
     Pvals = df_pvals
   )