-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript_hackaton.py
284 lines (168 loc) · 8.03 KB
/
script_hackaton.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#!/usr/bin/env python
# coding: utf-8
# In[23]:
#pip install tinys3
# In[30]:
#pip install -U textblob
# In[28]:
#pip install -U emoji
# In[26]:
#pip install -U wordcloud
# In[7]:
#pip install -U git+https://github.com/tweepy/tweepy.git@2efe385fc69385b57733f747ee62e6be12a1338b
#es necesario instalar tweepy antes de ejecutar los import
# In[47]:
import tweepy
import json
import os
import pandas as pd
from collections import OrderedDict
import requests
import boto3
comprehend = boto3.client('comprehend', region_name='us-east-1')
from dotenv import load_dotenv
# In[48]:
sample_tweet="It’s always a great day when I can randomly put my equestrian knowledge to good use at work! #AWS #BePeculiar"
# Key phrases
phrases = comprehend.detect_key_phrases(Text=sample_tweet, LanguageCode='en')
# Entities
entities = comprehend.detect_entities(Text=sample_tweet, LanguageCode='en')
#Sentiments
sentiments = comprehend.detect_sentiment(Text=sample_tweet, LanguageCode='en')
# Print the phrases:
print('------- phrases ---------')
for i in range(0, len(phrases['KeyPhrases'])):
print((phrases['KeyPhrases'][i]['Text']))
# Print the entities with entitity type:
print('------- entity : entity type ---------')
for i in range(0, len(entities['Entities'])):
print(entities['Entities'][i]['Text'] + ' : ' + entities['Entities'][i]['Type'] )
# Print the sentiment:
print('------- sentiment ---------')
print(sentiments['Sentiment'])
# In[49]:
api_key = 'XXXXXX'
api_secret = 'XXXXXXX'
access_token = 'XXXXXXXXXX'
access_secret = 'XXXXXXXXXX'
# In[50]:
#Envoltorio de Python para la API de Twitter
#%%bash
#pip install tweepy
# In[51]:
auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth)
# In[75]:
import csv
import emoji
date_since = "2020-01-01"
geocodes = "40.42955,-3.67930,100km"
contry = "Madrid"
#tag = '(bbva OR BBVA) AND (covid OR Covid-19 OR "COVID 19")'
tag = 'BBVA'
#tag = '(BBVA OR bbva OR bancomer OR Bancomer OR BANCOMER OR BBV OR "banco bilbao vizcaya" OR "Banco Bilbao Vizcaya" OR Bancomer OR "Blue Bank" OR "blue bank") AND (covid OR Covid OR COVID)'
#tag = ' AND (virus OR coronavirus OR covid OR covid-19 OR "covid 19" OR pandemia OR antiviral OR N95 OR máscara OR mascarilla OR infectado OR "distanciamiento social" OR SARS-CoV-2 OR SARS OR cov OR síntomas OR "falsos positivos" OR "falso positivo" OR paciente OR pacientes OR serología OR prueba OR "crisis de salud pública" OR "crisis de salud" OR "equipo de protección personal" OR infección OR infecciones OR "riesgo de transmisión" OR desinfectante OR "máscaras faciales" OR "mascarillas faciales" OR infeccioso OR infectado OR contagiado OR infecciosos OR infectados OR contagiados OR virología OR médico OR "este difícil momento" OR sanear OR pruebas OR cuarentena OR cuarentena OR tos OR toser OR fiebre OR hidroxicloroquina OR respiratorio OR respirar OR postmortem OR inflamatoria OR inflamación OR vacuna OR enfermedad OR enfermedades OR enfermedad OR contagioso OR epidemia OR epidemiológico OR epidemiológica OR "la crisis actual" OR emergencia OR "instalaciones de salud" OR máscaras OR máscara OR desinfectante OR antiséptico OR test OR tests OR Virus OR Coronavirus OR Covid OR Covid-19 OR "Covid 19" OR Pandemia OR Antiviral OR N95 OR Máscara OR Mascarilla OR Infectado OR "Distancia social" OR SARS-CoV-2 OR SARS OR Cov OR Síntomas OR "Falsos positivos" OR "Falso positivo" OR Paciente OR Pacientes OR Serología OR Prueba OR "Crisis de salud pública" OR "Crisis de salud" OR "Equipo de protección personal" OR "Infección OR Infecciones" OR "Riesgo de transmisión" OR Desinfectante OR "Máscaras faciales" OR "Mascarillas faciales" OR Infeccioso OR Infectado OR Contagiado OR Infecciosos OR Infectados OR Contagiados OR Virología OR Médico OR "Este difícil momento" OR Sanear OR Pruebas OR "Cuarentena OR Auto-cuarentena" OR "En cuarentena" OR Cuarentena OR "distanciamiento social" OR Tos OR Toser OR Fiebre OR Hidroxicloroquina OR Respiratorio OR Respirar OR Postmortem OR Inflamatoria OR Inflamación OR Vacuna OR Enfermedad OR Enfermedades OR Enfermedad OR Contagioso OR Epidemia OR Peligroso OR Peligro OR Epidemiológico OR Epidemiológica OR "La crisis actual" OR Emergencia OR "Instalaciones de salud" OR Máscaras OR Máscara OR Desinfectante OR Antiséptico OR Test OR Tests)'
#API.search(q[, geocode][, lang][, locale][, result_type][, count][, until][, since_id][, max_id][, include_entities])
tweets = api.search(q=tag, count=100)
# In[76]:
with open('testcsv_100_tweepy_'+contry+'_'+date_since+'.csv', 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(["Date", "Day", "Tweet", "Retweets", "Likes", "Contry"])
for tweet in tweets:
text_tweet = tweet.text
allchars = [str for str in text_tweet]
emoji_list = [c for c in allchars if c in emoji.UNICODE_EMOJI]
clean_text = ' '.join([str for str in text_tweet.split() if not any(i in str for i in emoji_list)])
data = [tweet.created_at.date().strftime("%b %d %Y %H:%M:%S "),
tweet.created_at.date().strftime("%b%d "),
clean_text,
tweet.retweet_count,
tweet.favorite_count,
contry]
writer.writerow(data)
# In[77]:
import pandas as pd
data = pd.read_csv("testcsv_100_tweepy_"+contry+"_"+date_since+".csv")
data
# In[96]:
posts = []
timestamp = []
locations = []
sentiments = []
positive = []
negative = []
neutral = []
for i in range(len(tweets)):
d = tweets[i].text
ts = tweets[i].created_at
l = tweets[i].user.location
if d != '':
res = comprehend.detect_sentiment(Text=d, LanguageCode='es')
s = res.get('Sentiment')
p = res.get('SentimentScore')['Positive']
neg = res.get('SentimentScore')['Negative']
neu = res.get('SentimentScore')['Neutral']
#created_at
timestamp.append(ts)
#text
posts.append(d)
#user.location
locations.append(l)
#Sentiment
sentiments.append(s)
positive.append(p)
negative.append(neg)
neutral.append(neu)
# In[104]:
import pandas as pd
from collections import OrderedDict
result = pd.DataFrame(OrderedDict( {
'tweets': posts
, 'location': pd.Series(locations).str.wrap(15)
, 'timestamp': timestamp
, 'sentiment': sentiments
, 'positiveScore': positive
, 'negativeScore': negative
, 'neutralScore' : neutral
}))
# In[105]:
print("BBVA")
result.groupby(by='tweets')['negativeScore'].mean().sort_values(ascending=False)
# In[106]:
result.groupby(by='location', sort = True)['tweets'].count().sort_values(ascending=False)
# In[117]:
from textblob import TextBlob
popularity_list = []
# In[118]:
for tweet in data ['Tweet']:
print(tweet)
analysis = TextBlob(tweet)
analysis = analysis.sentiment
print(analysis)
popularity = analysis.polarity
popularity_list.append(analysis)
# In[119]:
get_ipython().magic('matplotlib inline')
import matplotlib.pyplot as plt
# In[120]:
plt.figure(figsize=(20,10))
plt.scatter(data['Day'], popularity_list)
plt.title("ZYAN - Sentiments about BBVA in twitter")
plt.xlabel("Tweets")
plt.ylabel("Sentiment")
plt.show()
# In[121]:
from wordcloud import WordCloud
# Create and generate a word cloud image:
wordcloud = WordCloud(background_color="white", colormap="Dark2", max_font_size=150, random_state=42).generate(text_tweet)
#Displayy on the generated image
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title("BBVA + COVID 19")
plt.show()
# In[ ]:
api = tweepy.API(auth)
places = api.geo_search(query="USA",granularity="country")
place_id = places[0].id
tweets = api.search(q="place:%s" % place_id)