-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathutils_plot.py
142 lines (112 loc) · 5.73 KB
/
utils_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import numpy as np
from scipy.stats import ttest_ind
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
# helper function to output plot and write summary data
def plot_results(results, random_counterpart=None, random_concepts=None, num_random_exp=100,
min_p_val=0.05):
"""Helper function to organize results.
When run in a notebook, outputs a matplotlib bar plot of the
TCAV scores for all bottlenecks for each concept, replacing the
bars with asterisks when the TCAV score is not statistically significant.
If you ran TCAV with a random_counterpart, supply it here, otherwise supply random_concepts.
If you get unexpected output, make sure you are using the correct keywords.
Args:
results: dictionary of results from TCAV runs.
random_counterpart: name of the random_counterpart used, if it was used.
random_concepts: list of random experiments that were run.
num_random_exp: number of random experiments that were run.
min_p_val: minimum p value for statistical significance
"""
# helper function, returns if this is a random concept
def is_random_concept(concept):
if random_counterpart:
return random_counterpart == concept
elif random_concepts:
return concept in random_concepts
else:
return 'random500_' in concept
# print class, it will be the same for all
print("Class =", results[0]['target_class'])
# prepare data
# dict with keys of concepts containing dict with bottlenecks
result_summary = {}
# random
random_i_ups = {}
for result in results:
if result['cav_concept'] not in result_summary:
result_summary[result['cav_concept']] = {}
if result['bottleneck'] not in result_summary[result['cav_concept']]:
result_summary[result['cav_concept']][result['bottleneck']] = []
result_summary[result['cav_concept']][result['bottleneck']].append(result)
# store random
if is_random_concept(result['cav_concept']):
if result['bottleneck'] not in random_i_ups:
random_i_ups[result['bottleneck']] = []
random_i_ups[result['bottleneck']].append(result['i_up'])
# to plot, must massage data again
plot_data = {}
# print concepts and classes with indentation
for concept in result_summary:
# if not random
if not is_random_concept(concept):
print(" ", "Concept =", concept)
for bottleneck in result_summary[concept]:
i_ups = [item['i_up'] for item in result_summary[concept][bottleneck]]
# Calculate statistical significance
_, p_val = ttest_ind(random_i_ups[bottleneck], i_ups)
if bottleneck not in plot_data:
plot_data[bottleneck] = {'bn_vals': [], 'bn_stds': [], 'significant': []}
if p_val > min_p_val:
# statistically insignificant
plot_data[bottleneck]['bn_vals'].append(0.01)
plot_data[bottleneck]['bn_stds'].append(0)
plot_data[bottleneck]['significant'].append(False)
else:
plot_data[bottleneck]['bn_vals'].append(np.mean(i_ups))
plot_data[bottleneck]['bn_stds'].append(np.std(i_ups))
# plot_data[bottleneck]['significant'].append(p_val <= min_p_val)
plot_data[bottleneck]['significant'].append(True)
print(3 * " ", "Bottleneck =", ("%s. TCAV Score = %.2f (+- %.2f), "
"random was %.2f (+- %.2f). p-val = %.3f (%s)") % (
bottleneck, np.mean(i_ups), np.std(i_ups),
np.mean(random_i_ups[bottleneck]),
np.std(random_i_ups[bottleneck]), p_val,
"not significant" if p_val > min_p_val else "significant"))
# subtract number of random experiments
if random_counterpart:
num_concepts = len(result_summary) - 1
elif random_concepts:
num_concepts = len(result_summary) - len(random_concepts)
else:
num_concepts = len(result_summary) - num_random_exp
num_bottlenecks = len(plot_data)
bar_width = 0.35
# create location for each bar. scale by an appropriate factor to ensure
# the final plot doesn't have any parts overlapping
index = np.arange(num_concepts) * bar_width * (num_bottlenecks + 1)
# matplotlib
fig, ax = plt.subplots()
# draw all bottlenecks individually
for i, [bn, vals] in enumerate(plot_data.items()):
bar = ax.bar(index + i * bar_width, vals['bn_vals'],
bar_width, yerr=vals['bn_stds'], label=bn)
# draw stars to mark bars that are stastically insignificant to
# show them as different from others
for j, significant in enumerate(vals['significant']):
if not significant:
ax.text(index[j] + i * bar_width - 0.1, 0.01, "*",
fontdict={'weight': 'bold', 'size': 16,
'color': bar.patches[0].get_facecolor()})
# set properties
ax.set_title('TCAV Scores for each concept and bottleneck')
ax.set_ylabel('TCAV Score')
ax.set_ylim(0., 1.1)
ax.set_xticks(index + num_bottlenecks * bar_width / 2)
ax.set_xticklabels([concept for concept in result_summary if not is_random_concept(concept)])
handles, labels = ax.get_legend_handles_labels()
handles.append(Line2D([0], [0], marker='*', color='w', markerfacecolor='red', markersize=13))
labels.append('insignificant (p_val > {})'.format(min_p_val))
ax.legend(handles, labels)
fig.tight_layout()
plt.show()