-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNeuralNetworkTester.py
163 lines (131 loc) · 7.19 KB
/
NeuralNetworkTester.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# Dependencies
import ntpath
import numpy as np
import scipy.stats
from CSVFileDelegate import CSVFileDelegate
from NeuralNetworkClassifier import NeuralNetworkClassifier
from ResultPlotter import ResultPlotter
class NeuralNetworkTester(object):
def __init__(self, filepath):
self.filepath = filepath
self.training_iterations_list = [100, 300, 500, 750, 1000, 2000, 3000, 4000, 5000]
self.hidden_layer_sizes = range(1, 31, 1)
self.learning_rates = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1]
self.baseline_accuracy = self.calculate_accuracy_baseline(self.filepath)
def test_network_and_plot_results(self):
accuracy_results = []
for training_iterations in self.training_iterations_list:
print "Testing network for " + str(training_iterations) + " iterations"
# Initialise lists to store mean accuracy and standard error values
mean_accuracies = []
standard_errors = []
# Iterates through the hidden layer sizes specified
for hidden_layer_size in self.hidden_layer_sizes:
# initialise list to store accuracy results
accuracy_cache = []
print "Testing network with hidden layer size: " + str(hidden_layer_size)
# The Neural network is trained and tested for the specified number of iterations for each
# hidden layer size value
for i in range(0, 10, 1):
# Reinitialise dataset importer object
csv_delegate = CSVFileDelegate(self.filepath)
# Initialise Neural Network Classifier with data and target from data importer
neural_network_classifier = NeuralNetworkClassifier(csv_delegate.training_data,
csv_delegate.training_target)
# Build and train network with <hidden_layer_size> nodes in the hidden layer
neural_network_classifier.build_network(hidden_layer_size, training_iterations)
# Use classifier to classify testing data
results = neural_network_classifier.classify_set(csv_delegate.testing_data)
# Compare results to testing target
accuracy = self.compare_result_to_target(results, csv_delegate.testing_target)
accuracy_cache.append(accuracy)
print accuracy
# Store the mean and standard error values for each set of results
mean_accuracies.append((float(sum(accuracy_cache)) / 10))
standard_errors.append(scipy.stats.sem(accuracy_cache))
# Plot accuracy vs number of hidden nodes with the standard error
plotter = ResultPlotter(self.hidden_layer_sizes, mean_accuracies, standard_errors, training_iterations,
self.baseline_accuracy, ntpath.basename(self.filepath))
plotter.generate_plot_with_errors()
accuracy_results.append(mean_accuracies)
if plotter:
plotter.generate_combined_plot(self.hidden_layer_sizes, accuracy_results, self.training_iterations_list)
def test_learning_rates(self):
mean_accuracies = []
standard_errors = []
best_mean_accuracy = 0.0
best_learning_rate = 0.0
for learning_rate in self.learning_rates:
# initialise list to store accuracy results
accuracy_cache = []
print "Testing network with learning rate: " + str(learning_rate)
# The Neural network is trained and tested for the specified number of iterations for each
# hidden layer size value
for i in range(0, 10, 1):
# Reinitialise dataset importer object
csv_delegate = CSVFileDelegate("Datasets/owls15.csv")
# Initialise Neural Network Classifier with data and target from data importer
neural_network_classifier = NeuralNetworkClassifier(csv_delegate.training_data,
csv_delegate.training_target)
# Build and train network with <hidden_layer_size> nodes in the hidden layer
neural_network_classifier.build_network(3, 2000, learning_rate)
# Use classifier to classify testing data
results = neural_network_classifier.classify_set(csv_delegate.testing_data)
# Compare results to testing target
accuracy = self.compare_result_to_target(results, csv_delegate.testing_target)
accuracy_cache.append(accuracy)
print accuracy
# Store the mean and standard error values for each set of results
mean_accuracy = (float(sum(accuracy_cache)) / 10)
if mean_accuracy > best_mean_accuracy:
best_mean_accuracy = mean_accuracy
best_learning_rate = learning_rate
mean_accuracies.append(mean_accuracy)
standard_errors.append(scipy.stats.sem(accuracy_cache))
print "Best learning rate = " + str(best_learning_rate)
plotter = ResultPlotter(self.learning_rates, mean_accuracies, standard_errors, 0, 0, "Datasets/owls15.csv")
plotter.generate_learning_rate_plot_with_errors(self.learning_rates, mean_accuracies, standard_errors)
# Utility Methods
@staticmethod
def calculate_accuracy_baseline(filename):
"""Calculates the baseline accuracy for the dataset in question"""
# Initialises DatasetImporterObject
csv_delegate = CSVFileDelegate(filename)
# Separates the target classes from the input data
data, target = csv_delegate.split_data_and_target_lists(csv_delegate.full_data_list)
target_classes = {}
for entry in target:
if entry not in target_classes:
target_classes[entry] = 1
else:
target_classes[entry] += 1
max_count = 0
for target_class in target_classes:
if target_classes[target_class] > max_count:
max_count = target_classes[target_class]
percentage = float(max_count) / float(len(target))
return percentage
def compare_result_to_target(self, result, target):
"""Returns the percentage similarity between two lists"""
# result, target = lists to be compared
if self.target_is_categorical(target):
match_count = np.sum(np.array(result) == np.array(target))
match_percentage = float(match_count) / len(result)
else:
match_count = 0.0
for index in range(0, len(result), 1):
if abs(float(result[index]) - float(target[index])) < 1.0:
match_count += 1.0
match_percentage = match_count / len(result)
return match_percentage
@staticmethod
def target_is_categorical(target):
"""Returns true the data is categorical rather than numerical"""
result = True
# noinspection PyBroadException
try:
float(target[0])
result = False
except:
pass
return result