forked from xjr7670/QQzone_crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_moods.py
120 lines (95 loc) · 3.77 KB
/
get_moods.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import requests
import os
import sys
import time
import util
class Get_moods(object):
'''Get moods file with cookie'''
def __init__(self):
self.session = requests.Session()
self.headers = util.headers
self.g_tk = util.g_tk
def get_moods(self, qqnumber):
'''Use cookie and header to get moods file and save it to result folder with QQnumber name'''
referer = 'http://user.qzone.qq.com/' + qqnumber
self.headers['Referer'] = referer
# Create a folder with qq number to save it's result file
util.check_path('mood_result/' + qqnumber)
# Get the goal url, except the position argument.
url_base = util.parse_moods_url(qqnumber)
pos = 0
key = True
while key:
print("\tDealing with position:\t%d" % pos)
url = url_base + "&pos=%d" % pos
print(url)
res = self.session.get(url, headers = self.headers)
con = res.text
with open('mood_result/' + qqnumber + '/' + str(pos), 'w') as f:
f.write(con)
if '''"msglist":null''' in con:
key = False
# Cannot access...
if '''"msgnum":0''' in con:
with open('crawler_log.log', 'a') as log_file:
log_file.write("%s Cannot access..\n" % qqnumber)
key = False
# Cookie expried
if '''"subcode":-4001''' in con:
with open('crawler_log.log', 'a') as log_file:
log_file.write('Cookie Expried! Time is %s\n' % time.ctime())
sys.exit()
pos += 20
time.sleep(5)
#below method only make for me to get the friend's mood
#which havn't download yet.
#
#def get_rest_number(self):
# exists_number = os.listdir('mood_result')
# with open('qqnumber_backup.inc') as f:
# con = f.read()
# con = eval(con)
# for item in con:
# qq = item['data']
# if qq not in exists_number:
# print("Dealing with:\t%s" % qq)
# self.get_moods(qq)
# else:
# print('Finish!')
class Get_moods_start(object):
def __init__(self):
print('Start to get all friend\'s mood file and save it to the mood_result folder')
def get_moods_start(self):
app = Get_moods()
#app.get_rest_number()
with open('qqnumber.inc') as qnumber_file:
qnumber_string = qnumber_file.read()
qnumber_list = eval(qnumber_string)
# check if there is a mood_result folder to save the result file
# if not create it
util.check_path('mood_result')
while qnumber_list != []:
save_back_qnumber = qnumber_list[:]
item = qnumber_list.pop()
qq = item['data']
print("Dealing with:\t%s" % qq)
start_time = time.ctime()
with open('crawler_log.log', 'a') as log_file:
log_file.write("Program run at: %s\tGetting %s data...\n" % (start_time, qq))
try:
app.get_moods(qq)
except KeyboardInterrupt:
print('User Interrupt, program will exit')
sys.exit()
except Exception as e:
# Write the rest item back to qqnumber.inc
with open('qqnumber.inc', 'w') as qnumber_file:
qnumber_file.write(str(save_back_qnumber))
# Write the log
with open('crawler_log.log', 'a') as log_file:
exception_time = time.ctime()
log_file.write("Exception occured: %s\n%s\n" % (exception_time, e))
else:
print("%s Finish!" % qq)
else:
print("Finish All!")