-
Notifications
You must be signed in to change notification settings - Fork 66
/
Copy pathmad_outlier_detection.py
98 lines (68 loc) · 2.63 KB
/
mad_outlier_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date : 2018-11-28 16:27:19
# @Author : Bolun Wang ([email protected])
# @Link : http://cs.ucsb.edu/~bolunwang
import os
import sys
import time
import numpy as np
from keras.preprocessing import image
##############################
# PARAMETERS #
##############################
RESULT_DIR = 'results' # directory for storing results
IMG_FILENAME_TEMPLATE = 'gtsrb_visualize_%s_label_%d.png' # image filename template for visualization results
# input size
IMG_ROWS = 32
IMG_COLS = 32
IMG_COLOR = 3
INPUT_SHAPE = (IMG_ROWS, IMG_COLS, IMG_COLOR)
NUM_CLASSES = 43 # total number of classes in the model
##############################
# END PARAMETERS #
##############################
def outlier_detection(l1_norm_list, idx_mapping):
consistency_constant = 1.4826 # if normal distribution
median = np.median(l1_norm_list)
mad = consistency_constant * np.median(np.abs(l1_norm_list - median))
min_mad = np.abs(np.min(l1_norm_list) - median) / mad
print('median: %f, MAD: %f' % (median, mad))
print('anomaly index: %f' % min_mad)
flag_list = []
for y_label in idx_mapping:
if l1_norm_list[idx_mapping[y_label]] > median:
continue
if np.abs(l1_norm_list[idx_mapping[y_label]] - median) / mad > 2:
flag_list.append((y_label, l1_norm_list[idx_mapping[y_label]]))
if len(flag_list) > 0:
flag_list = sorted(flag_list, key=lambda x: x[1])
print('flagged label list: %s' %
', '.join(['%d: %2f' % (y_label, l_norm)
for y_label, l_norm in flag_list]))
pass
def analyze_pattern_norm_dist():
mask_flatten = []
idx_mapping = {}
for y_label in range(NUM_CLASSES):
mask_filename = IMG_FILENAME_TEMPLATE % ('mask', y_label)
if os.path.isfile('%s/%s' % (RESULT_DIR, mask_filename)):
img = image.load_img(
'%s/%s' % (RESULT_DIR, mask_filename),
color_mode='grayscale',
target_size=INPUT_SHAPE)
mask = image.img_to_array(img)
mask /= 255
mask = mask[:, :, 0]
mask_flatten.append(mask.flatten())
idx_mapping[y_label] = len(mask_flatten) - 1
l1_norm_list = [np.sum(np.abs(m)) for m in mask_flatten]
print('%d labels found' % len(l1_norm_list))
outlier_detection(l1_norm_list, idx_mapping)
pass
if __name__ == '__main__':
print('%s start' % sys.argv[0])
start_time = time.time()
analyze_pattern_norm_dist()
elapsed_time = time.time() - start_time
print('elapsed time %.2f s' % elapsed_time)