-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpoliticians.py
39 lines (31 loc) · 1.18 KB
/
politicians.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
"""
Scraping politicians' Twitter accounts from tweetcongress.org.
Did not know of Sunlight Labs API until after this was written.
"""
import urllib2
from BeautifulSoup import BeautifulSoup
URL = "http://tweetcongress.org/members/index"
for party in ["D", "R"]:
accounts = set([])
# Access the URL, save the HTML using Beautiful Soup.
for page in range(1, 50):
html = urllib2.urlopen(URL + '/page:' + str(page) + '/party:' + party)
soup = BeautifulSoup(html)
# Parse HTML with Beautiful Soup. Ugly stuff down here.
for tag in soup.findAll('p'):
try:
if tag['class'] == "tweetTitle":
contents = tag.contents[0].strip()
i = contents.find('@')
accounts.add(contents[i:])
except KeyError:
continue
# How many legislators did I find?
print len(handles)
# Save their Twitter accounts to a new file, formatted as a list.
with open('./%s.py' % party, 'w') as f:
f.write('#!/usr/bin/python\n\n')
f.write("%s = ['" % party)
for account in accounts:
f.write("%s', '" % account)
f.write("']")