-
Notifications
You must be signed in to change notification settings - Fork 0
/
predict.py
92 lines (71 loc) · 2.76 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import re
import pickle
# from emotions import create_feature
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction import DictVectorizer
import joblib
def ngram(token, n):
output = []
for i in range(n-1, len(token)):
ngram = ' '.join(token[i-n+1:i+1])
output.append(ngram)
return output
def create_feature(text, nrange=(1, 1)):
text_features = []
text = text.lower()
text_alphanum = re.sub('[^a-z0-9#]', ' ', text)
for n in range(nrange[0], nrange[1]+1):
text_features += ngram(text_alphanum.split(), n)
text_punc = re.sub('[a-z0-9]', ' ', text)
text_features += ngram(text_punc.split(), 1)
return Counter(text_features)
print("\n\n***** Welcome to Emotion Predictor! *****\n\n")
dummy_sentences = [
"This looks so impressive",
"I have a fear of dogs",
"My dog died yesterday",
"I don't love you anymore..!",
"Go kill yourself you piece of shit!"
]
num_dummy_sentences = len(dummy_sentences)
for x in range(num_dummy_sentences):
print(x, dummy_sentences[x])
num_sentences = input("How many additional sentences would you like to predict the emotion for? ")
if num_sentences.isdigit():
num_sentences = int(num_sentences)
if (num_sentences > max_sentences):
print("Error!", num_sentences, "is more than the maximum number of sentences:", max_sentences)
quit()
else:
print("Error:", num_sentences, "is not an integer.")
quit()
all_sentences = dummy_sentences.copy()
for x in range(num_sentences):
sentence_num = num_dummy_sentences + x
sent = str(input("Enter sentence # " + str(sentence_num) + " : "))
all_sentences.append(sent)
num_all_sentences = len(all_sentences)
print("\nWe will be predicting emotion for each of the following", num_all_sentences, "sentences...\n")
for x in range(num_all_sentences):
print(x, all_sentences[x])
print("\nPredicting emotion for each of the", num_all_sentences, "sentences...\n")
with open('saved_model', 'rb') as f:
clf = pickle.load(f)
emoji_dict = {"joy":"😂", "fear":"😱", "anger":"😠", "sadness":"😢", "disgust":"😒", "shame":"😳", "guilt":"😳"}
print("Input four sentences to predict the emotion of each:")
vectorizer = DictVectorizer(sparse = True)
def predict_emotion(txt):
features = create_feature(txt, nrange=(1, 4))
features = vectorizer.transform(features)
prediction = clf.predict(features)[0]
print(txt,emoji_dict[prediction])
for text in all_sentences:
predict_emotion(text)
print("Done\a")
# quit()