-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcrawler_empty.py
32 lines (25 loc) · 985 Bytes
/
crawler_empty.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import time
from selenium import webdriver
from src.company_list_empty import get_company_list
from src.company_list_empty import write_companies
company_list = get_company_list()
print(f'[CRAWLER] Number of companies: {len(company_list)}')
write_companies('companies_empty.json')
with open('jobs_empty.json', 'w') as f:
f.write('{}')
# setup headless webdriver
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-extensions')
driver = webdriver.Chrome(options=chrome_options)
current_jobs = {}
n = 1
for company in company_list:
st = time.time()
print(f'[CRAWLER] scrape {n} of {len(company_list)}')
n = n + 1
jobs_data = company.scraper_type().getJobs(driver, company.jobs_url, company.company_name)
print('[CRAWLER] Execution time:', (time.time() - st), 'seconds')
driver.close()