From 75b441491024359402f17d593db55a10438790c7 Mon Sep 17 00:00:00 2001 From: rileyh Date: Wed, 11 Dec 2024 15:33:16 -0600 Subject: [PATCH] [#179] Unify variable and argument names - tp, tn, fp, fn are easy to type but look a little too similar to be easily readable. - true_positives, true_negatives, false_positives, false_negatives are really explicit but difficult to type. --- hlink/linking/core/model_metrics.py | 36 +++++++++++++++++--------- hlink/tests/core/model_metrics_test.py | 22 ++++++++-------- 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/hlink/linking/core/model_metrics.py b/hlink/linking/core/model_metrics.py index 18af2dc..46533bb 100644 --- a/hlink/linking/core/model_metrics.py +++ b/hlink/linking/core/model_metrics.py @@ -11,34 +11,46 @@ def f_measure(true_pos: int, false_pos: int, false_neg: int) -> float: return 2 * true_pos / (2 * true_pos + false_pos + false_neg) -def mcc(tp: int, tn: int, fp: int, fn: int) -> float: +def mcc(true_pos: int, true_neg: int, false_pos: int, false_neg: int) -> float: """ - Given the counts of true positives (tp), true negatives (tn), false - positives (fp), and false negatives (fn) for a model run, compute the + Given the counts of true positives (true_pos), true negatives (true_neg), false + positives (false_pos), and false negatives (false_neg) for a model run, compute the Matthews Correlation Coefficient (MCC). """ - if (math.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))) != 0: - mcc = ((tp * tn) - (fp * fn)) / ( - math.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) + if ( + math.sqrt( + (true_pos + false_pos) + * (true_pos + false_neg) + * (true_neg + false_pos) + * (true_neg + false_neg) + ) + ) != 0: + mcc = ((true_pos * true_neg) - (false_pos * false_neg)) / ( + math.sqrt( + (true_pos + false_pos) + * (true_pos + false_neg) + * (true_neg + false_pos) + * (true_neg + false_neg) + ) ) else: mcc = 0 return mcc -def precision(tp: int, fp: int) -> float: - if (tp + fp) == 0: +def precision(true_pos: int, false_pos: int) -> float: + if (true_pos + false_pos) == 0: precision = np.nan else: - precision = tp / (tp + fp) + precision = true_pos / (true_pos + false_pos) return precision -def recall(tp: int, fn: int) -> float: - if (tp + fn) == 0: +def recall(true_pos: int, false_neg: int) -> float: + if (true_pos + false_neg) == 0: recall = np.nan else: - recall = tp / (tp + fn) + recall = true_pos / (true_pos + false_neg) return recall diff --git a/hlink/tests/core/model_metrics_test.py b/hlink/tests/core/model_metrics_test.py index 4fc56a3..2cb1d33 100644 --- a/hlink/tests/core/model_metrics_test.py +++ b/hlink/tests/core/model_metrics_test.py @@ -53,20 +53,20 @@ def test_f_measure_is_harmonic_mean_of_precision_and_recall( def test_mcc_example() -> None: - tp = 3112 - fp = 205 - fn = 1134 - tn = 33259 + true_pos = 3112 + false_pos = 205 + false_neg = 1134 + true_neg = 33259 - mcc_score = mcc(tp, tn, fp, fn) + mcc_score = mcc(true_pos, true_neg, false_pos, false_neg) assert abs(mcc_score - 0.8111208) < 0.0001, "expected MCC to be near 0.8111208" def test_precision_example() -> None: - tp = 3112 - fp = 205 + true_pos = 3112 + false_pos = 205 - precision_score = precision(tp, fp) + precision_score = precision(true_pos, false_pos) assert ( abs(precision_score - 0.9381972) < 0.0001 ), "expected precision to be near 0.9381972" @@ -93,10 +93,10 @@ def test_precision_no_positive_predictions() -> None: def test_recall_example() -> None: - tp = 3112 - fn = 1134 + true_pos = 3112 + false_neg = 1134 - recall_score = recall(tp, fn) + recall_score = recall(true_pos, false_neg) assert ( abs(recall_score - 0.7329251) < 0.0001 ), "expected recall to be near 0.7329251"