Skip to content

Commit

Permalink
feat: add dx_npv_prevalence and dx_ppv_prevalence
Browse files Browse the repository at this point in the history
  • Loading branch information
overdodactyl committed Oct 27, 2024
1 parent de7664b commit bfa9174
Show file tree
Hide file tree
Showing 14 changed files with 307 additions and 34 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
^renv$
^renv\.lock$
^diagnosticSummary\.Rproj$
^\.Rproj\.user$
^LICENSE\.md$
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ docs/
inst/WORDLIST
docs
test_cases
renv/
.Rprofile
renv.lock
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export(dx_mcc)
export(dx_mcnemars)
export(dx_nir)
export(dx_npv)
export(dx_npv_prevalence)
export(dx_odds_ratio)
export(dx_plot_calibration)
export(dx_plot_cap)
Expand All @@ -52,6 +53,7 @@ export(dx_plot_rocs)
export(dx_plot_thresholds)
export(dx_plot_youden_j)
export(dx_ppv)
export(dx_ppv_prevalence)
export(dx_prevalence)
export(dx_sensitivity)
export(dx_specificity)
Expand Down
5 changes: 4 additions & 1 deletion R/dx_constructor.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#' @param outcome_label Label for outcome (string)
#' @param grouping_variables Character vector of variable names to
#' be summarized by. Variables are converted to factors if not already one.
#' @param prevalence Numeric value between 0 and 1, representing a target
#' prevalence for additional NPV and PPV calculations.
#' @param citype Confidence interval type.
#' @param bootreps Number of bootstrap samples used to generate F1 score CI
#' @param bootseed Seed value to be used when calculating bootsraped CI's
Expand All @@ -27,7 +29,7 @@
dx <- function(data,
classlabels = c("Negative", "Positive"),
threshold_range = NA, outcome_label = NA, pred_varname, true_varname,
setthreshold = .5, poslabel = 1, grouping_variables = NA,
setthreshold = .5, poslabel = 1, grouping_variables = NA, prevalence = NA,
citype = "exact", bootreps = 2000, bootseed = 20191015,
doboot = FALSE, direction = "auto", ...) {

Expand Down Expand Up @@ -84,6 +86,7 @@ dx <- function(data,
setthreshold = setthreshold,
poslabel = poslabel,
grouping_variables = grouping_variables,
prevalence = prevalence,
citype = citype,
bootreps = bootreps,
bootseed = bootseed,
Expand Down
6 changes: 6 additions & 0 deletions R/dx_measure.R
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,12 @@ dx_measure <- function(data, threshold, options, var = "Overall",
# Apply the generic function to all metrics and combine results
metrics_list <- lapply(metric_calculations, call_metric)

if (!is.na(options$prevalence)) {
metrics_list$npv_prevalence <- dx_npv_prevalence(cm, prevalence = options$prevalence)
metrics_list$ppv_prevalence <- dx_ppv_prevalence(cm, prevalence = options$prevalence)
}


# Combine all metric results into one data frame
results <- do.call(rbind, metrics_list)

Expand Down
118 changes: 118 additions & 0 deletions R/dx_metrics.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,124 @@ dx_npv <- function(cm, detail = "full", ...) {
metric_binomial(cm$tn, cm$testneg, name = "Negative Predictive Value", detail = detail, ...)
}

metric_npv_ppv_prevalence <- function(est, logit, varlogit, ...) {
cc <- stats::qnorm(1 - (1 - 0.95) / 2)
lci_logit <- logit - cc * sqrt(varlogit)
uci_logit <- logit + cc * sqrt(varlogit)

lower <- exp(lci_logit) / ( 1 + exp(lci_logit))
upper <- exp(uci_logit) / ( 1 + exp(uci_logit))

formatted_estimate <- conf_int(
est,
lower,
upper,
percent = TRUE
)

measure_df(
estimate = formatted_estimate,
estimate_raw = est,
lci_raw = lower,
uci_raw = upper,
ci_type = "Simple Logit (Mercaldo et al, 2007)",
...
)
}

#' Calculate Negative Predictive Value (NPV) at Target Prevalence
#'
#' Computes the Negative Predictive Value (NPV) adjusted to a specified prevalence
#' level. This function is useful for understanding classifier performance in settings
#' where the actual prevalence of the condition may differ from that in the initial data.
#'
#' @inherit metrics-params
#'
#' @param prevalence Numeric value between 0 and 1, representing the target prevalence
#' for adjusting the NPV calculation.
#'
#' @details
#' This function calculates the NPV using the formula:
#' \deqn{NPV = \frac{Specificity \times (1 - Prevalence)}{(1 - Sensitivity) \times Prevalence + Specificity \times (1 - Prevalence)}}
#' where `Specificity` is the true negative rate and `Sensitivity` is the true positive rate.
#' Adjusting NPV for prevalence allows better estimation of the classifier’s performance
#' in different population settings.
#'
#' Confidence intervales are calucated using a simple logit (Mercaldo et al, 2007)
#'
#' @examples
#' cm <- dx_cm(dx_heart_failure$predicted, dx_heart_failure$truth,
#' threshold = 0.5, poslabel = 1
#' )
#' # Calculate NPV at a prevalence of 0.1
#' dx_npv_prevalence(cm, prevalence = 0.1)
#'
#' @seealso [dx_cm()], [dx_npv()] for the non-prevalence adjusted NPV.
#' @export
#' @concept metrics
dx_npv_prevalence <- function(cm, prevalence, detail = "full", ...) {
spec <- dx_specificity(cm, detail = "simple")
sens <- dx_sensitivity(cm, detail = "simple")
npv <- (spec * (1 - prevalence)) / ((1 - sens) * prevalence + spec * (1 - prevalence))
if (detail == "simple") return(npv)
logit <- log((spec * (1 - prevalence)) / ((1 - sens) * prevalence))
varlogit <- (sens / (1 - sens)) * 1 / cm$dispos + ((1 - spec) / spec ) * 1 / cm$disneg
metric_npv_ppv_prevalence(
est = npv,
logit,
varlogit,
measure = "Negative Predictive Value (Target Prevalence)",
notes = paste0("Prevalence value: ", prevalence)
)
}

#' Calculate Positive Predictive Value (PPV) at Target Prevalence
#'
#' Computes the Positive Predictive Value (PPV) adjusted to a specified prevalence
#' level. This function is useful for understanding classifier performance in settings
#' where the actual prevalence of the condition may differ from that in the initial data.
#'
#' @inherit metrics-params
#'
#' @param prevalence Numeric value between 0 and 1, representing the target prevalence
#' for adjusting the PPV calculation.
#' @details
#' This function calculates the PPV using the formula:
#' \deqn{PPV = \frac{Sensitivity \times Prevalence}{(Sensitivity \times Prevalence) + (1 - Specificity) \times (1 - Prevalence)}}
#' where `Sensitivity` is the true positive rate and `Specificity` is the true negative rate.
#' Adjusting PPV for prevalence allows a more accurate assessment of the classifier’s performance
#' in different population settings.
#'
#' Confidence intervales are calucated using a simple logit (Mercaldo et al, 2007)
#'
#' @examples
#' cm <- dx_cm(dx_heart_failure$predicted, dx_heart_failure$truth,
#' threshold = 0.5, poslabel = 1
#' )
#' # Calculate PPV at a prevalence of 0.1
#' dx_ppv_prevalence(cm, prevalence = 0.1)
#'
#' @seealso [dx_cm()], [dx_ppv()] for the non-prevalence adjusted PPV.
#' @export
#' @concept metrics
dx_ppv_prevalence <- function(cm, prevalence, detail = "full", ...) {
spec <- dx_specificity(cm, detail = "simple")
sens <- dx_sensitivity(cm, detail = "simple")
ppv <- (sens * prevalence) / ((sens * prevalence) + (1-spec) * (1 - prevalence))

if (detail == "simple") return(ppv)

logit <- log((sens * prevalence) / ((1 - spec) * (1 - prevalence)))
varlogit <- ((1 - sens) / sens ) * 1 / cm$dispos + (spec / (1 - spec) ) * 1 / cm$disneg
metric_npv_ppv_prevalence(
est = ppv,
logit,
varlogit,
measure = "Positive Predictive Value (Target Prevalence)",
notes = paste0("Prevalence value: ", prevalence)
)
}

#' Calculate False Negative Rate (FNR)
#'
#' Calculates the False Negative Rate (FNR), which is the proportion of actual positives
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ knitr::opts_chunk$set(
)
```

# diagnosticSummary <img src="man/figures/logo.png" align="right" />
# diagnosticSummary <img src="man/figures/logo.png" align="right" width="100" height="100" />

<!-- badges: start -->
[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental)
Expand Down
36 changes: 19 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

<!-- README.md is generated from README.Rmd. Please edit that file -->

# diagnosticSummary <img src="man/figures/logo.png" align="right" />
# diagnosticSummary <img src="man/figures/logo.png" align="right" width="100" height="100" />

<!-- badges: start -->

Expand Down Expand Up @@ -30,13 +30,16 @@ library(diagnosticSummary)
# Load sample data
data("dx_heart_failure")
head(dx_heart_failure)
#> AgeGroup Sex truth predicted AgeSex
#> 1 (20,50] Male 0 0.016164112 (20,50] - Male
#> 2 (20,50] Male 0 0.074193671 (20,50] - Male
#> 3 (20,50] Female 0 0.004677979 (20,50] - Female
#> 4 (20,50] Female 0 0.017567313 (20,50] - Female
#> 5 (20,50] Female 0 0.017517025 (20,50] - Female
#> 6 (20,50] Male 0 0.051570734 (20,50] - Male
#> AgeGroup Sex truth predicted AgeSex predicted_rf
#> 1 (20,50] Male 0 0.016164112 (20,50] - Male 0.19774011
#> 2 (20,50] Male 0 0.074193671 (20,50] - Male 0.04624277
#> 3 (20,50] Female 0 0.004677979 (20,50] - Female 0.22448980
#> 4 (20,50] Female 0 0.017567313 (20,50] - Female 0.09326425
#> 5 (20,50] Female 0 0.017517025 (20,50] - Female 0.04878049
#> 6 (20,50] Male 0 0.051570734 (20,50] - Male 0.10982659
```

``` r

# Create dx object
dx_obj <- dx(
Expand Down Expand Up @@ -67,23 +70,22 @@ summary(dx_obj, variable = "Overall", show_var = F, show_label = F)
| LRT+ | 3.54 (2.66, 4.71) |
| LRT- | 0.20 (0.13, 0.32) |
| Odds Ratio | 17.59 (9.12, 33.94) |
| F1 Score | 75.5% (68.3%, 81.5%) |
| F2 Score | 80.7% (74.0%, 86.4%) |
| F1 Score | 75.5% (68.6%, 81.2%) |
| F2 Score | 80.7% (73.9%, 86.3%) |
| Prevalence | 37.5% (31.7%, 43.7%) |
| False Negative Rate | 15.3% (8.8%, 24.0%) |
| False Positive Rate | 23.9% (17.6%, 31.2%) |
| False Discovery Rate | 32.0% (23.8%, 41.0%) |
| AUC PR | 0.87 |
| Cohen’s Kappa | 0.58 (0.48, 0.68) |
| Matthews Correlation Coefficient | 59.0% (48.9%, 68.4%) |
| Balanced Accuracy | 80.4% (75.3%, 85.1%) |
| Informedness | 60.8% (50.9%, 70.7%) |
| Markedness | 57.2% (47.4%, 67.2%) |
| G-mean | 80.3% (75.1%, 84.8%) |
| Fowlkes-Mallows Index | 75.9% (69.6%, 81.4%) |
| Matthews Correlation Coefficient | 59.0% (49.6%, 68.1%) |
| Balanced Accuracy | 80.4% (75.5%, 85.5%) |
| Informedness | 60.8% (51.2%, 69.9%) |
| Markedness | 57.2% (47.2%, 66.7%) |
| G-mean | 80.3% (75.5%, 85.0%) |
| Fowlkes-Mallows Index | 75.9% (69.7%, 81.7%) |
| Brier Score | 0.11 |
| Pearson’s Chi-squared | p\<0.01 |
| Pearson’s Chi-squared | p\<0.01 |
| Fisher’s Exact | p\<0.01 |
| G-Test | p\<0.01 |

Expand Down
4 changes: 4 additions & 0 deletions man/dx.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 53 additions & 0 deletions man/dx_npv_prevalence.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 53 additions & 0 deletions man/dx_ppv_prevalence.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added man/figures/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit bfa9174

Please sign in to comment.