-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathModel.py
126 lines (84 loc) · 3.27 KB
/
Model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn import linear_model
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
## Data import and seperate
def import_Data():
Data=pd.read_csv('Disease_Data_BiGram.csv')
numOfFeatures=Data.shape[1]-2
X=Data.iloc[:,0:Data.shape[1]-1]
Y=Data['Class']
Y_=Data['Subject']
return X,Y,Y_,numOfFeatures
## spliting into test and training data
def split_class_Data(X,Y): # spliting the data on the bases of classes and storing in a dictionary
#Data['Class']=Y
sub_class={}
super_classes=list(set(Y))
for cls in super_classes:
#print(cls)
sub=X[Y==cls]
sub_class[cls]=sub
return sub_class
def split_Data(X,Y):
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.4, random_state=0)
return X_train, X_test, y_train, y_test
def apply_Model(X_train,y_train):
#X_train=X_train.iloc[:,0:1139]
#clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
#clf=linear_model.LogisticRegression(C=1e5).fit(X_train, y_train)
#print(clf.coef_.shape)
clf = MultinomialNB().fit(X_train, y_train)
return clf
def apply_sub_Model(sub_class,Y,numOfFeatures): # training models for data per each class and storing in a dictionary
sub_Models={}
for cls in list(set(Y)):
X=sub_class[cls]
X_train=X.iloc[:,0:numOfFeatures]
y_train=X['Subject']
model=apply_Model(X_train,y_train)
sub_Models[cls]=model
return sub_Models
def model_Predict(clf,X_test):
#X_test=X_test.iloc[:,0:1139]
y_pred=clf.predict(X_test)
#acc = accuracy_score(y_test, y_pred)
return y_pred
def super_Predict(X,Y): # predicting super class for given test dataset
X_train, X_test, y_train, y_test = split_Data(X, Y)
numOfFeatures=X_train.shape[1]-1
model = apply_Model(X_train.iloc[:,0:numOfFeatures], y_train)
y_pred = model_Predict(model, X_test.iloc[:,0:numOfFeatures])
acc = accuracy_score(y_test, y_pred)
print("Super Class Prediction Accuracy:", acc)
return y_pred,X_train,X_test,y_train,y_test
def sub_predict(y_pred, X_train, X_test, y_train,Y): # predicting sub class (Subject) for given test data set
numOfFeatures = X_train.shape[1] - 1
y_sub_pred=np.array([])
sub_class = split_class_Data(X_train,y_train)
sub_Models=apply_sub_Model(sub_class,y_train,numOfFeatures)
i=0
for index,case in X_test.iterrows():
superCls=y_pred[i]
case = pd.DataFrame(case).iloc[0:numOfFeatures]
sub_label=model_Predict(sub_Models[superCls],case.transpose())
y_sub_pred=np.append(y_sub_pred,sub_label)
i=i+1
acc=accuracy_score(X_test['Subject'], y_sub_pred)
print("Sub Class Prediction Accuracy:", acc)
def main():
#super class processing and prediction
X,Y,Y_,numOfFeatures=import_Data()
#print(X)
y_pred, X_train, X_test, y_train, y_test=super_Predict(X,Y)
y_pred1, X_train1, X_test1, y_train1, y_test1 = super_Predict(X, Y_)
# sub class prediction
sub_predict(y_pred, X_train, X_test, y_train,Y)
#clf.score(X_test, y_test)
#print(result)
main()