-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_a_feature_extractor.py
132 lines (75 loc) · 3.1 KB
/
run_a_feature_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
__author__ = 'Stefano Mauceri'
__email__ = '[email protected]'
"""
In this file I show you how to apply a given
feature-extractor to your data.
The resulting feature-based representation can be used
for classification. If you want to use multiple
features you need to use multiple feature-extractors
separately and concatenate resulting features.
"""
# =============================================================================
# IMPORT
# =============================================================================
import os
import numpy as np
from src.fitness.math_functions import *
from sklearn.metrics import roc_curve, auc
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
# =============================================================================
# FUNCTIONS
# =============================================================================
class KNN(NearestNeighbors):
k = 1
metric_params = {}
def __init__(self, k=1, metric_params={}):
super().__init__(n_neighbors=k, metric_params=metric_params)
def score_samples(self, X_test):
try:
neighbors = self.kneighbors(X_test)[0]
except:
self.algorithm = 'brute'
self.fit(self._fit_X)
neighbors = self.kneighbors(X_test)[0]
return np.mean(neighbors, axis=1) * -1
def extract_features(X, phenotype):
T = X
return eval(phenotype)
def scale_features(X_train, X_test):
scaler = StandardScaler()
scaler.fit(X_train)
return scaler.transform(X_train), scaler.transform(X_test)
def get_auroc(Y_true, Test_scores):
fpr, tpr, _ = roc_curve(Y_true, Test_scores, pos_label=1)
return auc(fpr, tpr)
# =============================================================================
# MAIN
# =============================================================================
# SET THE DATASET NAME, THE POSITIVE CLASS LABEL, THE FEATURE EXTRACTOR.
dataset = 'SyntheticControl'
class_ = 1
feature_extractor = 'extract(T, 58, None, True, function = lambda T:ARCoeff(T))'
# LOAD DATA.
cwd = os.getcwd()
X_train = np.load(os.path.join(cwd, 'data', f'{dataset}', f'{dataset}_X_TRAIN.npy'))
Y_train = np.load(os.path.join(cwd, 'data', f'{dataset}', f'{dataset}_Y_TRAIN.npy'))
X_test = np.load(os.path.join(cwd, 'data', f'{dataset}', f'{dataset}_X_TEST.npy'))
Y_test = np.load(os.path.join(cwd, 'data', f'{dataset}', f'{dataset}_Y_TEST.npy'))
# ADAPT DATA TO ONE-CLASS CLASSIFICATION.
X_train = X_train[Y_train == class_]
Y_test = (Y_test == class_).astype(int)
# EXTRACT FEATURES.
X_train = extract_features(X_train, feature_extractor)
X_test = extract_features(X_test, feature_extractor)
# STANDARDISE FEATURES.
X_train, X_test = scale_features(X_train, X_test)
# CLASSIFY.
classifier = KNN()
classifier.fit(X_train)
scores = classifier.score_samples(X_test)
auroc = get_auroc(Y_test, scores) * 100
print(f'THE AUROC IS: {round(auroc, 1)}%')
# =============================================================================
# END
# =============================================================================