forked from niccdias/scrapers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbroward_county_properties.py
68 lines (50 loc) · 2.45 KB
/
broward_county_properties.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from os import getcwd
property_df = pd.read_csv('broward_properties.csv', header = 0)
cfn_paths = list(property_df['cfn_path'])
cfns = []
for cfn_path in cfn_paths:
match = re.match('.*=(.*)', cfn_path)
cfns.append(match.groups(0)[0])
profile = webdriver.FirefoxProfile()
profile.set_preference('browser.download.folderList', 2)
profile.set_preference('browser.download.manager.showWhenStarting', False)
profile.set_preference('browser.download.dir', getcwd())
profile.set_preference("browser.helperApps.neverAsk.openFile","application/pdf,application/x-pdf")
profile.set_preference('browser.helperApps.neverAsk.saveToDisk', 'application/pdf,application/x-pdf')
profile.set_preference("browser.download.manager.alertOnEXEOpen", False);
profile.set_preference("browser.download.manager.showWhenStarting", False);
profile.set_preference("browser.download.manager.focusWhenStarting", False);
profile.set_preference("browser.download.useDownloadDir", True);
profile.set_preference("browser.helperApps.alwaysAsk.force", False);
profile.set_preference("browser.download.manager.alertOnEXEOpen", False);
profile.set_preference("browser.download.manager.closeWhenDone", True);
profile.set_preference("browser.download.manager.showAlertOnComplete", False);
profile.set_preference("browser.download.manager.useWindow", False);
profile.set_preference("services.sync.prefs.sync.browser.download.manager.showWhenStarting", False);
profile.set_preference("pdfjs.disabled", True);
browser = webdriver.Firefox(profile)
grantor_list = []
grantee_list = []
for cfn in cfns:
URL = 'https://officialrecords.broward.org/oncorev2/ShowDetails.aspx?CFN=' + cfn
print('Getting' + URL)
browser.get(URL)
browser.switch_to.frame(browser.find_element_by_name("contents"))
grantors = browser.find_element_by_id('lblDirectName').text
grantor_list.append(grantors.replace('\n', ";"))
grantees = browser.find_element_by_id('lblReverseName').text
grantee_list.append(grantees.replace('\n', ";"))
browser.switch_to.default_content()
browser.switch_to.frame(browser.find_element_by_name("doc"))
browser.find_element_by_tag_name('a').click()
sleep(2)
property_df['grantors'] = grantor_list
property_df['grantees'] = grantee_list
property_df.to_csv('broward_properties.csv')