-
Notifications
You must be signed in to change notification settings - Fork 23
/
utils.py
93 lines (67 loc) · 2.54 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"""Utility function for INVASE.
Reference: Jinsung Yoon, James Jordon, Mihaela van der Schaar,
"IINVASE: Instance-wise Variable Selection using Neural Networks,"
International Conference on Learning Representations (ICLR), 2019.
Paper Link: https://openreview.net/forum?id=BJg_roAcK7
Contact: [email protected]
---------------------------------------------------
(1) Feature performance metrics
(2) Prediction performance metrics
(3) Bernoulli sampling
"""
# Necessary packages
import numpy as np
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score
def feature_performance_metric (ground_truth, importance_score):
"""Performance metrics for feature importance (TPR and FDR).
Args:
- ground_truth: ground truth feature importance
- importance_score: computed importance scores for each feature
Returns:
- mean_tpr: mean value of true positive rate
- std_tpr: standard deviation of true positive rate
- mean_fdr: mean value of false discovery rate
- std_fdr: standard deviation of false discovery rate
"""
n = importance_score.shape[0]
tpr = np.zeros([n, ])
fdr = np.zeros([n, ])
# For each sample
for i in range(n):
# tpr
tpr_nom = np.sum(importance_score[i, :] * ground_truth[i, :])
tpr_den = np.sum(ground_truth[i, :])
tpr[i] = 100 * float(tpr_nom)/float(tpr_den + 1e-8)
# fdr
fdr_nom = np.sum(importance_score[i, :] * (1-ground_truth[i, :]))
fdr_den = np.sum(importance_score[i,:])
fdr[i] = 100 * float(fdr_nom)/float(fdr_den+1e-8)
mean_tpr = np.mean(tpr)
std_tpr = np.std(tpr)
mean_fdr = np.mean(fdr)
std_fdr = np.std(fdr)
return mean_tpr, std_tpr, mean_fdr, std_fdr
def prediction_performance_metric (y_test, y_hat):
"""Performance metrics for prediction (AUC, APR, Accuracy).
Args:
- y_test: testing set labels
- y_hat: prediction on testing set
Returns:
- auc: area under roc curve
- apr: average precision score
- acc: accuracy
"""
auc = roc_auc_score (y_test[:, 1], y_hat[:, 1])
apr = average_precision_score (y_test[:, 1], y_hat[:, 1])
acc = accuracy_score (y_test[:, 1], 1.*(y_hat[:, 1] > 0.5))
return auc, apr, acc
def bernoulli_sampling (prob):
""" Sampling Bernoulli distribution by given probability.
Args:
- prob: P(Y = 1) in Bernoulli distribution.
Returns:
- samples: samples from Bernoulli distribution
"""
n, d = prob.shape
samples = np.random.binomial(1, prob, (n, d))
return samples