-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspider.py
45 lines (40 loc) · 1.47 KB
/
spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from pyppeteer import launch
import asyncio
from hoshino import R
# 等待时长 网络较慢时可以调高 但不能太低
waittime = 5
# 最大重连次数
max_retries = 5
# 重试等待时间
retrytime = 5
FILE_PATH = R.img('bwiki').path
# 设置一个连接网页的函数,用于重连
async def web_connect(page, url):
retries = 0
while retries < max_retries:
try:
await page.goto(url, {'waitUntil': 'domcontentloaded'})
break
except Exception as e:
print(f"页面加载失败: {e}, 正在重试")
retries += 1
await asyncio.sleep(retrytime)
else:
print(f"已到达最大重试次数,请检查网络链接并重试。")
exit()
return page
async def spider():
browser = await launch(headless = True, defaultViewport={"width": 1600, "height": 1200}, args=['--disable-popup-blocking', '--window-size=1600,1200'])
page = await browser.newPage()
url = f'https://wiki.biligame.com/pcr/%E9%A6%96%E9%A1%B5'
page = await web_connect(page, url)
await asyncio.sleep(waittime)
# 分辨率缩放比影响截图,推荐使用无头模式截图
elements = await page.xpath(f'//div[text()="活动日历"]/../..')
element = elements[0]
img = FILE_PATH + r'\date_cal.png'
await element.screenshot({
'path': img
})
await browser.close()
asyncio.get_event_loop().run_until_complete(spider())