-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyzer.py
76 lines (65 loc) · 2.48 KB
/
analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import re
import nltk
from nltk.corpus import stopwords
from nltk.probability import FreqDist
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
class Analyzer():
"""Implements sentiment analysis."""
def __init__(self):
"""Initialize Analyzer."""
self.positives = []
self.negatives = []
#https://stackoverflow.com/questions/15778747/clarifications-on-the-re-findall-method-in-python
with open("positive-words.txt", 'r') as lines:
for line in lines:
positive_words = re.findall(r"[\w]+|[.,!?;]", line.rstrip())
#print(pos_words)
self.positives.append(positive_words)
print(self.positives)
with open("negative-words.txt", 'r') as lines:
for line in lines:
negative_words = re.findall(r"[\w]+|[.,!?;]", line.rstrip())
#print(pos_words)
self.negatives.append(negative_words)
print(self.negatives)
def analyze(self, text):
"""Analyze text for sentiment, returning its score."""
tokenizer = nltk.tokenize.TweetTokenizer()
tokens = tokenizer.tokenize(text.lower())
score = 0
#https://stackoverflow.com/questions/8275417/check-substring-match-of-a-word-in-a-list-of-words
for token in tokens:
for item in self.positives:
if token in item:
score = score + 1
for item in self.negatives:
if token in item:
score = score - 1
#print(score)
return score
def vader_analyze(self, text):
vader = SentimentIntensityAnalyzer()
score = vader.polarity_scores(text)
return score['compound']
def frequency(self, text):
fdist1 = FreqDist(text)
print(fdist1)
top_50 = fdist1.most_common(50)
return top_50
def animal_frequency(self, input_list, raw_text):
animals = input_list
animals = [word.lower() for word in animals]
#http://www.nltk.org/howto/stem.html
stemmer = nltk.PorterStemmer()
raw_text = raw_text
singles = [stemmer.stem(word) for word in raw_text]
animal_text = []
for word in singles:
if word in animals:
animal_text.append(word)
else:
continue
fdist1 = FreqDist(animal_text)
top_50 = fdist1.most_common(50)
return top_50