-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzillow.py
73 lines (63 loc) · 3.42 KB
/
zillow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import requests
import json
from bs4 import BeautifulSoup
import pandas as pd
zip_or_location = input("Please input zip or location ")
csv_data = []
headers = {'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, sdch, br',
'accept-language': 'en-GB,en;q=0.8,en-US;q=0.6,ml;q=0.4',
'cache-control': 'max-age=0',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}
url = f"https://www.zillow.com/homes/for_sale/{zip_or_location}/1_p/?fromHomePage=true&shouldFireSellPageImplicitClaimGA=false&fromHomePageTab=buy"
r = requests.get(url, headers = headers)
soup = BeautifulSoup(r.content, "lxml")
pages = soup.find_all("li", {"aria-current":"page"})[-1].text.split("of ")[-1]
print(f' Found {pages} pages')
for i in range(int(pages)):
print(f' Working on {i + 1} of {pages} pages')
url = f"https://www.zillow.com/homes/for_sale/{zip_or_location}/{i+1}_p/?fromHomePage=true&shouldFireSellPageImplicitClaimGA=false&fromHomePageTab=buy"
headers = {'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, sdch, br',
'accept-language': 'en-GB,en;q=0.8,en-US;q=0.6,ml;q=0.4',
'cache-control': 'max-age=0',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}
r = requests.get(url, headers = headers)
print(url)
soup = BeautifulSoup(r.content, "lxml")
#print(soup.find_all("li", {"aria-current":"page"})[-1].text.split("of "))
j = soup.find("script", {"data-zrr-shared-data-key" : "mobileSearchPageStore"})
j_data = str(j).split("--")[1]
w_json = json.loads(j_data)
#csv_data = []
search_results = w_json.get('cat1').get('searchResults').get('listResults', [])
for properties in search_results:
address = properties.get('address')
property_info = properties.get('hdpData', {}).get('homeInfo')
city = property_info.get('city')
state = property_info.get('state')
postal_code = property_info.get('zipcode')
price = properties.get('price')
bedrooms = properties.get('beds')
bathrooms = properties.get('baths')
area = properties.get('area')
info = f'{bedrooms} bds, {bathrooms} ba ,{area} sqft'
broker = properties.get('brokerName')
property_url = properties.get('detailUrl')
title = properties.get('statusText')
data = {'address': address,
'city': city,
'state': state,
'postal_code': postal_code,
'price': price,
'facts and features': info,
'real estate provider': broker,
'url': property_url,
'title': title}
#print(data)
csv_data.append(data)
df = pd.DataFrame(csv_data)
#print(df)
df.to_csv(f"output {zip_or_location}.csv", index = False)