-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmodel_comparison.py
90 lines (80 loc) · 3.55 KB
/
model_comparison.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import model_vanilla_nn as vanillaModule
import model_cnn as cnnModule
from preprocessing_vanilla_nn import preprocess_all
from preprocess_cnn import convert_urls_to_vector
import numpy as np
import matplotlib.pyplot as plt
import csv
def main():
# Taking in respective files
csv_files_vanilla = ["dataset/results-phishing_url.csv", "dataset/results-cc_1_first_9617_urls.csv"]
file_names_cnn = ["dataset/phishing_url.txt", "dataset/cc_1_first_9617_urls"]
is_phishing = [True, False]
# Training and Testing inputs and labels for Vanilla NN
train_data_vanilla, train_labels_vanilla, test_data_vanilla, test_labels_vanilla = preprocess_all(csv_files_vanilla, is_phishing)
# Training and Testing inputs and labels for CNN Module
train_data_cnn, train_labels_cnn, test_data_cnn, test_labels_cnn, vocabulary = convert_urls_to_vector(file_names_cnn, is_phishing)
# Initialization
model_1 = vanillaModule.Model()
# Model 2 is initialized later according to the graph requirements
model_1_accuracies = []
model_2_accuracies = []
#NOTE: UNCOMMENT THE FOLLOWING IF YOU WANT ACCURY BY KERNEL SIZE FOR THE CNN MODEL.
# kernel_sizes = range(3,7)
# for kernel_size in kernel_sizes:
# model_2 = cnnModule.Model(len(vocabulary),kernel_size)
# for i in range(0, 10):
# cnnModule.train(model_2, train_data_cnn, train_labels_cnn)
# accuracy_cnn = cnnModule.test(model_2, test_data_cnn, test_labels_cnn)
# model_2_accuracies.append(accuracy_cnn.numpy())
# plt.scatter(kernel_sizes, model_2_accuracies, edgecolors='r')
# plt.xlabel('Kernel Sizes')
# plt.ylabel('CNN Model Accuracy')
# plt.title('Accuracy by Kernel Sizes')
# plt.show()
# NOTE: UNCOMMENT THE FOLLOWING IF YOU WANT ACCURACY BY EPOCH FOR BOTH MODELS!
model_2 = cnnModule.Model(len(vocabulary),5)
for epoch in range(model_1.epochs):
vanillaModule.train(model_1, train_data_vanilla, train_labels_vanilla)
cnnModule.train(model_2, train_data_cnn, train_labels_cnn)
accuracy_vanilla = vanillaModule.test(model_1, test_data_vanilla, test_labels_vanilla)
model_1_accuracies.append(accuracy_vanilla)
accuracy_cnn = cnnModule.test(model_2, test_data_cnn, test_labels_cnn)
model_2_accuracies.append(accuracy_cnn)
# Print out accuracy
print("Accuracy using Vanilla NN:", accuracy_vanilla)
print("Accuracy using CNN: ", accuracy_cnn)
# data to plot
#n_groups = 10
num_epochs = range(0,10)
# create plot
fig, ax = plt.subplots()
#index = np.arange(n_groups)
#bar_width = 0.35
#opacity = 0.8
# rects1 = plt.bar(index, model_1_accuracies, bar_width,
# alpha=opacity,
# color='r',
# label='Vanilla RNN')
# rects2 = plt.bar(index + bar_width, model_2_accuracies, bar_width,
# alpha=opacity,
# color='g',
# label='CNN')
with open("vanilla_nn_results.csv", 'w', newline='') as myfile:
wr1 = csv.writer(myfile)
wr1.writerow(model_1_accuracies)
with open("cnn_results.csv", 'w', newline='') as myfile2:
wr2 = csv.writer(myfile2)
wr2.writerow(model_2_accuracies)
plt.plot(num_epochs, model_1_accuracies, 'bo', label='Vanilla RNN')
plt.plot(num_epochs, model_2_accuracies, 'r+', label='CNN')
plt.xlabel('Epoch Number')
plt.ylabel('Accuracy')
plt.title('Accuracy by epochs')
#plt.xticks(index + bar_width, ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'))
plt.legend()
ax.set_ylim(ymin=0.0)
#plt.tight_layout()
plt.show()
if __name__ == '__main__':
main()