-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMLP.py.1
109 lines (101 loc) · 3.22 KB
/
MLP.py.1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import sys
#reload(sys)
#sys.setdefaultencoding('utf-8')
import app.parser.getData as importArticles
import app.parser.articleRetrieval.getArticles as getContent
import app.parser.sentences as sent
import app.analytics.features as fe
from sklearn import tree, feature_extraction, svm, linear_model, neural_network
from sklearn.feature_extraction.text import CountVectorizer
from multiprocessing import Pool
import numpy as np
import datetime
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
import app.analytics.filterSentences as fl
import networkx as nx
import matplotlib.pyplot as plt
G={}#nx.DiGraph()
H = {}
np.seterr(divide='ignore',invalid='ignore')
listOfYears = []
clf = svm.SVC(probability=True)
#clf = tree.DecisionTreeClassifier()
probs = []
titles = []
trainData = eval(open('titleOnlyDouble','r').readlines()[0])
testData = open('titleOnlyDoubleTest','r').readlines()
B= None
#C
def train(features):
features = [item for item in features if len(item[0]) != 0]
feats = [item[0] for item in features]
A = len(features)
B = min(map(len,feats))
X = np.ones((A,B))
Y = np.ones((A))
for feature in range(len(features)):
Y[feature] = features[feature][2]#label
for item in range(0,B):
X[feature][item] = features[feature][0][item]
clf.fit(X,Y)
return B
def test(features,B):
pre1 = 0
pre2 = 0
correct = 0
probs = []
labels = []
predictions = []
features = [item for item in features if len(item[0]) != 0]
for feature in features:
print (len(feature[0]))
if len(feature[0]) < 22:
continue
temp = np.array(feature[0][0:B]).reshape((1, -1))
predict = clf.predict(temp[0][0:B].reshape((1,-1)))
#prob = max(clf.predict_proba(temp)[0])
#print (prob)
#add all articles to G
if(feature[1][1] not in G.keys()):
G.update({feature[1][1]:[]})
H.update({feature[1][1]:feature[2]})
if(feature[1][0] not in G.keys()):
G.update({feature[1][0]:[]})
H.update({feature[1][0]:feature[2]})
#Add list of which came before what
if(predict == 1):
G[feature[1][0]].append(feature[1][1])
labels.append(1)
predictions.append(feature[-1])
pre1 += 1
else:
G[feature[1][1]].append(feature[1][0])
labels.append(0)
predictions.append(feature[-1])
pre2 += 1
probs.append([predict, feature[2]])
if(feature[2] == predict[0]):
correct +=1
print confusion_matrix(labels,predictions)
print ("Accuracy = " + str(correct) + '/' + str(len(features)))
print (pre1)
print (pre2)
print (datetime.datetime.now())
p = Pool(20)
trainFeatures = []
for I in range(100): #len(trainData)):
if trainData[I] is not None:
trainFeatures.append(trainData[I])
B = train(trainFeatures)
print (datetime.datetime.now())
#train(generateDataPoints(trainArticles))
print ("Training Complere. Now For Testing")
testFeatures = []
testData = eval(testData[0])
for I in range(len(testData)):
if testData[I] != None:
testFeatures.append(testData[I])
test(testFeatures,B)
print (G)