-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtwitter_usernames.py
75 lines (59 loc) · 2.22 KB
/
twitter_usernames.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/local/bin/python3
import re
import requests
import common
import pywikibot
from bs4 import BeautifulSoup
from __local__ import credentials
from pywikibot import pagegenerators
TWITTER_ID_PROP = 'P6552'
TWITTER_USERNAME_PROP = 'P2002'
SESSION = requests.Session()
def doImport():
cat = 'Twitter username not in Wikidata'
site = pywikibot.Site('en', 'wikipedia')
catObj = pywikibot.Category(site, cat)
data = no_data_item = list()
pages = pagegenerators.CategorizedPageGenerator(catObj, recurse=False)
for page in pages:
try:
data_item = pywikibot.ItemPage.fromPage(page)
except:
print("Skipping %s, no data item found" % page.title())
no_data_item.append(page.title())
continue
username = extractUsername(page)
if username:
data.append([username, data_item])
print("Found %s for %s:" %(username, page.title()))
else:
print("Cannot extract Twitter username for %s. Either there's none or there "
+ "are multiple and it's unclear which one is official" % page.title())
common.recordPages(no_data, 'missing-data-items-list-twitter')
result = common.addMultipleClaims(data, TWITTER_USERNAME_PROP)
for u, d in data:
if d.title() in result['items']:
num_id = get_numeric_id(u)
if num_id:
common.addQualifier(d.title(), TWITTER_ID_PROP, TWITTER_USERNAME_PROP, num_id)
print('Finished. Updated %s items, %s were skipped' %(result['added'], result['skipped']))
def extractUsername(page):
pass
def getNumericIds(usernames):
params = headers = {}
params['usernames'] = ','.join(usernames)
params['user.fields'] = ','.join(['id'])
headers['Authorization'] = 'Bearer {}'.format(credentials.twitter['bearer_token'])
url = 'https://api.twitter.com/2/users/by?'
response = SESSION.get(url, params=params, headers=headers)
if response.status_code == 200:
res = response.json()
if 'data' in res:
ret = dict()
for r in res['data']:
ret[r['id']] = r
return ret
else:
return False
if __name__ == '__main__':
doImport()