-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgraph.py
72 lines (61 loc) · 2.13 KB
/
graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# Pipeline
from requests_futures.sessions import FuturesSession
from collections import Counter
import requests
import json
import operator
MY_CLIENT_ID = "b45b1aa10f1ac2941910a7f0d10f8e28"
session = FuturesSession(max_workers=20) #empirically determined best on my machine
def submitRequest(songId, page):
requestUrl = "http://api.soundcloud.com/tracks/" + songId + "/playlists?client_id=" + MY_CLIENT_ID + "&limit=5&linked_partitioning=1&offset=" + str(page * 5)
print("Page" + str(page))
return session.get(requestUrl)
def batchRequests(id, batchSize, batchNum):
i = batchNum * batchSize
limit = i + batchSize
responseArray = []
#submit async
while(i < limit):
responseArray.append(submitRequest(id, i))
i += 1
return responseArray
def batchProcess(responseArray, processedArray):
for i, response in enumerate(responseArray):
print("processing response: " + str(i))
#soundcloud thinks im ddosing :)
try:
result = response.result()
except:
print "Unexpected error:", sys.exc_info()[0]
continue
#sometimes connections drop
if result.status_code != 200:
print("failed")
continue
rJson = result.json()
if 'collection' in rJson:
processResponse(rJson, processedArray)
if not "next_href" in rJson:
return False
return True
#adds processed response to processedArray
def processResponse(rJson, processedArray):
collections = rJson["collection"]
for collection in collections:
tracks = collection["tracks"]
for track in tracks:
processedArray.append(track["id"])
def crawlAllPlaylists(id):
next = True
processedArray = []
batchNum = 0
while(next):
responses = batchRequests(id, 50, batchNum)
next = batchProcess(responses, processedArray)
batchNum += 1
return processedArray
def getRankings(id):
results = crawlAllPlaylists(id)
counted = Counter(results)
sorted_counts = sorted(counted.items(), key=operator.itemgetter(1), reverse = True)
return sorted_counts