-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathIndeed.py
81 lines (62 loc) · 2.49 KB
/
Indeed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#
# Script for scrapping jobs details from Indeed
#
import requests
from bs4 import BeautifulSoup
from utils import sleep_scrapper, get_request_headers
class Indeed:
def __init__(self, pos, location):
self.post = pos.replace(" ", "+")
self.location = location.replace(" ", "+")
def run(self):
base_url = 'https://www.indeed.co.in/jobs?q=' \
'%s&l=%s&start=' % (self.post, self.location)
for j in range(0, 1000, 10):
url = ''
try:
url = base_url + str(j)
print '[Indeed] :: fetching data from url:', url
r = requests.get(url, headers=get_request_headers())
if not r.status_code == 200:
print "[Indeed] :: Failed to " \
"get content of url: %s" % url
return
html_doc = r.content
soup = BeautifulSoup(html_doc, 'html.parser')
for div in soup.find_all('div'):
# ignore divs with classes
if not div.attrs.has_key('class'):
continue
cls = div.attrs['class']
if 'row' in cls and 'result' in cls:
self.scrap_result_row(div)
# break
sleep_scrapper('IndeedScraper')
except Exception as exp:
print '[IndeedScraper] :: run() :: Got exception : ' \
'%s and fetching data from url: %s' % (exp, url)
def scrap_result_row(self, div):
try:
# title
title = div.find('span', class_='company').text.strip()
print "[Indeed] :: title: %s" % title
# location
span = div.find('span', class_='location')
location = span.text.strip()
print "[Indeed] :: location: %s" % location
# salary
sal = ''
span = div.find('span', class_='no-wrap')
if span:
sal = span.text.strip()
print "[Indeed] :: salary: %s" % sal
# summary
span = div.find('span', class_='summary')
summary = span.text.strip()
print "[Indeed] :: summery: %s" % summary
except Exception as exp:
print '[Indeed] :: scrap_result_row() :: ' \
'Got exception : %s' % exp
if __name__ == '__main__':
scraper = Indeed('java', 'mohali punjab')
scraper.run()