-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
73 lines (50 loc) · 1.75 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import requests
import csv
import os
import re
from pyquery import PyQuery as pq
from selenium import webdriver
import img2pdf
url = 'https://manhua.dmzj.com/lanqiufeirenquancai/'
name = '灌篮高手'
option = webdriver.ChromeOptions()
option.add_argument('log-level=3')
browser = webdriver.Chrome(options=option)
def main():
content = requests.get(url).text
doc = pq(content)
a_list = doc('.cartoon_online_border a').items()
csv_file = './{0}/book.csv'.format(name)
csv_dir = os.path.dirname(csv_file)
if not os.path.isdir(csv_dir):
os.makedirs(csv_dir)
book_csv = open(csv_file, 'w', newline='', encoding='utf-8')
csv_writer = csv.writer(book_csv)
book_index = 1
for item in a_list:
link = 'https://manhua.dmzj.com' + item.attr('href')
title = item.text()
csv_writer.writerow((book_index, title, link))
book_index += 1
download_item(link, title)
book_csv.close()
def download_item(item_url, item_name):
browser.get(item_url)
arr_pages = browser.execute_script("return arr_pages;")
# print(r)
file_dir = './{0}/{1}'.format(name, item_name)
if not os.path.exists(file_dir):
os.mkdir(file_dir)
img_arr = []
for (index, item) in enumerate(arr_pages):
img_url = 'https://images.dmzj.com/' + item
img_save_file = file_dir + '/' + re.search(r'([^\/]*?)$', item).group(1)
img_arr.append(img_save_file)
with open(img_save_file, 'wb')as p:
# print(img_url)
p.write(requests.get(img_url, headers={"Referer": item_url}).content)
pdf_file = './{0}/{1}.pdf'.format(name, item_name)
img2pdf.convert(img_arr, pdf_file, item_name)
if __name__ == '__main__':
main()
print('all finish')