-
Notifications
You must be signed in to change notification settings - Fork 1
/
justdialscraper.py
54 lines (45 loc) · 1.78 KB
/
justdialscraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# -*- coding: utf-8 -*-
import csv
import json
import requests
from bs4 import BeautifulSoup
print(25*"=")
print("Just Dial Scraper")
print(25*"=")
url = 'http://www.justdial.com/functions/ajxsearch.php?national_search=0&act'\
'=pagination&city={0}&search={1}&page={2}'
what = raw_input("Enter your Query: ")
what = what.replace(' ', '+')
where = raw_input("Enter the Location: ")
with open(what+"_"+where+'.csv', 'w') as f:
f.write('company, address, phone\n')
page = 1
while True:
print('Scraping Page', page)
resp = requests.get(url.format(where, what, page))
if not resp.json()['paidDocIds']:
print(25*"-")
print('Scraping Finished')
print(25*"-")
break
markup = resp.json()['markup'].replace('\/', '/')
soup = BeautifulSoup(markup, 'html.parser')
for thing in soup.find_all('section'):
csv_list = []
if thing.get('class') == ['jcar']:
# Company name
for a_tag in thing.find_all('a'):
if a_tag.get('onclick') == "_ct('clntnm', 'lspg');":
csv_list.append(a_tag.get('title'))
# Address
for span_tag in thing.find_all('span'):
if span_tag.get('class') == ['mrehover', 'dn']:
csv_list.append(span_tag.get_text().strip())
# Phone number
for a_tag in thing.find_all('a'):
if a_tag.get('href').startswith('tel:'):
csv_list.append(a_tag.get('href').split(':')[-1])
csv_list = ['"'+item+'"' for item in csv_list]
writeline = ','.join(csv_list)+'\n'
f.write(','.join(csv_list)+'\n')
page += 1