forked from h4ck3rk3y/ScrapeCommerce
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheby.py
95 lines (92 loc) · 3.06 KB
/
eby.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import re
import sys
import requests
import socket
from struct import *
from fuzzywuzzy import fuzz
socket.setdefaulttimeout(10000)
reload(sys)
sys.setdefaultencoding("utf-8")
def check(name, picture, desc, price, rating, warranty):
name = name.group(1)
if picture :
picture = picture.group(1)
else:
picture = 'Picture Not Available'
if desc:
desc = desc.group(1)
else :
desc = 'Description not Available'
if price:
price = price.group(1)
else:
price = 'No Price Value'
if rating:
rating = rating.group(1)
else:
rating = "Review Not Available"
if warranty:
warranty = warranty.group(1)
else :
warranty = "Warranty Not Available"
return [name, picture, desc, price, rating, warranty]
def visitPhones(phone_url, phone_list):
headers = {
'User-Agent': 'Mozilla/5.0'
}
response = requests.get(phone_url)
if response.status_code == 200 :
price = re.search('<span class="notranslate" id="prcIsum" itemprop="price" style="">(.*)</span>', response.text)
picture = re.search("image.src= '(.*)';", response.text)
name = re.search('<span id="vi-lkhdr-itmTitl" class="u-dspn">(.*)</span>', response.text)
desc = re.search('features: <\/td>\W*<td width="50.0%">\W*<span>(.*)<\/span>', response.text)
rating = re.search('<div id="si-fb" >(.*)% Positive feedback</div>', response.text)
warranty = re.search('Warranty: <\/td>\W*<td width="50.0%">\W*<span>(.*)<\/span>', response.text)
i = 0
if name :
for dictionary in phone_list:
for key in dictionary.keys():
if fuzz.ratio(key,name.group(1)) >= 60:
phone_list[i][str(name.group(1))] = check(name, picture, desc, price, rating, warranty)
return phone_list
i = i + 1
phone_list.append({})
phone_list[len(phone_list)-1][str(name.group(1))] = check(name, picture, desc, price, rating, warranty)
return phone_list
else:
return False
def action(brand, keyword):
phone_list = []
headers = {
'User-Agent': 'Mozilla/5.0'
}
url = 'http://www.ebay.in/sch/Mobile-Phones-/15032/i.html?_from=R40&LH_AllListings=1&Brand='+brand+'&_sop=12&_nkw='+keyword+'&_pgn=1&_skc=200&rt=nc'
response = requests.get(url, headers= headers)
if response.status_code==200:
rcnt = re.search('<span class="rcnt" >(.*)</span>', response.text)
if rcnt:
rcnt = rcnt.group(1)
else :
return False
rcnt = int(rcnt.replace(',',''))
for x in xrange(1,int(rcnt/200.0)):
print 'On Page ' + str(x) + ' of Results, processing 200 phones.'
url = 'http://www.ebay.in/sch/Mobile-Phones-/15032/i.html?_from=R40&LH_AllListings=1&Brand='+brand+'&_sop=12&_nkw='+keyword+'&_pgn='+str(x)+'&_skc=200&rt=nc'
response = requests.get(url, headers=headers)
if response.status_code == 200:
p = re.compile('<h3 class="lvtitle"><a href="(.*)"\W*class')
match = re.findall(p, response.text)
if match:
for x in match:
visitPhones(x, phone_list)
else:
print 'Something is not right'
else :
'Something is not right!'
print phone_list
else:
'Bad Connectivity!'
def ebay():
brand = raw_input("Enter Brand Name:\n")
keyword = raw_input("Enter Keyword:\n")
action(brand, keyword)