-
Notifications
You must be signed in to change notification settings - Fork 0
/
proxy.py
142 lines (125 loc) · 4.33 KB
/
proxy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python
# -*- coding:UTF-8 -*-
'''
1. Renew PROXY LIST if the data was older than 5 minutes.
Then save to *proxy_list.json*.
2. Rendom IP Information from proxy_list.json, then test it.
3. test all function.
4. coming soon...
'''
from urllib.request import (urlopen,
Request,
ProxyHandler,
build_opener,
install_opener)
from bs4 import BeautifulSoup
import lxml
import json
import os
import time
import random
'''Get newest proxy list from free-proxy-list.net.
And parsing list data to JSON format file(proxy_list.json)
'''
# Get proxy list and save to json file.
def renew_proxy_info():
req = Request("https://free-proxy-list.net/#list",
headers={'User-Agent': 'Mozilla/5.0'})
try:
n = urlopen(req, timeout=1)
except Exception as e:
print('##### URLOPEN ERROR #####')
else:
with n:
try:
with open('cache.html', 'wt') as f:
f.write(n.read().decode('UTF-8'))
except Exception as e:
print('##### CACHE I/O ERROR #####')
else:
pass
finally:
n.close()
# Parsing proxy list from HTML to JSON file.
with open("cache.html") as c:
soup = BeautifulSoup(c, "lxml")
table_body = soup.find('tbody')
ips = []
for html_tr in table_body.find_all('tr'):
tds = html_tr.find_all('td')
ips.append({'IP_Address_td': tds[0].string,
'Port_td': tds[1].string,
'Code_td': tds[2].string,
'Country_td': tds[3].string,
'Anonymity_td': tds[4].string,
'Google_td': tds[5].string,
'Https_td': tds[6].string,
'Last_Checked_td': tds[7].string})
with open('proxy_list.json', 'wt')as f:
f.write(json.dumps(ips))
def get_random_ip():
global conn_info
try:
with open('proxy_list.json', 'rt') as f:
conn_info = json.dumps(random.choice(json.load(f)))
except Exception as e:
print('proxy_list.json I/O error.')
else:
return(conn_info)
# Valid proxy
def check_proxy():
global proxy_info
global result_ip
connectinfo = json.loads(conn_info)
proxy_info = (connectinfo['IP_Address_td']) + \
':' + (connectinfo['Port_td'])
result_ip = connectinfo['IP_Address_td']
opener = build_opener(ProxyHandler({'http': proxy_info}))
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
install_opener(opener)
try:
with urlopen('http://ifconfig.co/ip', timeout=1) as n:
result = (n.read().decode('UTF-8') + ':' +
connectinfo['Port_td']).replace('\n', '')
except Exception as e:
return('proxy_invalid')
else:
return(result)
def valid_all():
global conn_info
try:
with open('proxy_list.json', 'rt') as f:
result = json.load(f)
for ip in result:
print(ip)
except Exception as e:
print('proxy_list.json I/O error.')
else:
pass
if __name__ == '__main__':
if not os.path.exists('proxy_list.json') \
or time.time() - os.stat("proxy_list.json").st_mtime > 300 \
or len(json.load(
open('proxy_list.json', 'rt', encoding='UTF-8'))) == 0:
renew_proxy_info()
get_random_ip()
final_resule = check_proxy()
while (final_resule == 'proxy_invalid'):
if len(json.load(
open('proxy_list.json', 'rt', encoding='UTF-8'))) == 0:
renew_proxy_info()
else:
with open('proxy_list.json', 'rt', encoding='UTF-8') as json_data:
data = json.load(json_data)
for item in data:
if item['IP_Address_td'] == result_ip:
data.remove(item)
with open('proxy_list.json',
'w+', encoding='UTF-8') as new_proxy_list:
new_proxy_list.write(json.dumps(data))
print('DELETE: ', '==> ' + proxy_info)
get_random_ip()
final_resule = check_proxy()
with open('proxy_list.json', 'rt') as jsonfile:
print('Number of data records: ', len(json.load(jsonfile)))
print('Result: ', final_resule)