-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape_sale.py
77 lines (64 loc) · 2.05 KB
/
scrape_sale.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import urllib2
import sqlite3
import string
from bs4 import BeautifulSoup
from byteify import byteify
def scrape_sale(sale):
page = urllib2.urlopen(sale)
soup = BeautifulSoup(page, 'lxml')
catalog = soup.find('table', { 'id': 'DataTable' })
conn = sqlite3.connect("fta.sqlite")
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS items (\
location text, \
timeout text, \
sale text, \
itemId text, \
link text, \
info text, \
description text, \
UNIQUE(itemId) ON CONFLICT REPLACE)")
timeout = "June 10, 2017 2:15 PM EST"
header = soup.find('div', id='wrapper').find('p', align='center')
title = header.findAll(text=True)[1]
timeout = byteify(title.split(' - ')[-1])
args = []
for row in catalog.findAll("tr")[1:]:
cells = row.findAll('td')
itemId = cells[0].find(text=True).strip(".")
description = None
info = None
location = None
itemPage = string.replace(sale, "mnprint", "mnlist")
link = "{}/{}".format(itemPage, itemId)
details = cells[1].findAll('b')
for elem in details:
tag = elem.find(text=True)
content = elem.next_sibling.strip()
if (not content):
continue
if (content[0] == ":"):
content = content[2:]
if ("Description" in tag):
description = content
elif ("Additional Info" in tag):
info = content
elif ("Item Location" in tag):
location = content
else:
pass
if (description and info and location):
break
args.append((
location,
timeout,
sale,
itemId,
link,
info,
description))
cursor.executemany(
"INSERT INTO items(location, timeout, sale, itemId, \
link, info, description) \
VALUES(?,?,?,?,?,?,?)", args)
conn.commit()