-
Notifications
You must be signed in to change notification settings - Fork 1
/
race_io.py
124 lines (109 loc) · 3.64 KB
/
race_io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import json
import util
import pprint
import requests
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from urllib.request import urlopen
from util import make_list
from util import print_table
pp = pprint.PrettyPrinter()
INDEX_MIN = 2
def get_betinfo(filename):
bet = json.load(open(filename, 'r'))
return bet
def get_raceinfo(url):
# get response from url
driver = webdriver.Chrome()
driver.get(url)
time.sleep(3)
# driver.implicitly_wait(3)
soup = BeautifulSoup(driver.page_source, 'lxml')
info_left = soup.find('div', class_='info').find_all('div')[0]
driver.quit()
course = ' '.join(list(map(lambda x: x.get_text(), info_left.find_all('p')[1:-1])))
place = info_left.find_all('p')[0].find('span').get_text()
ddy_index = info_left.find('p', class_='f_ffChinese').get_text().replace(" ", "").split("\n")[1]
return {
'course': course,
'place': place,
'ddy_index': ddy_index
}
def get_results(url):
# get response from url
tables = []
while len(tables) < INDEX_MIN:
# print(len(tables), end=' ')
driver = webdriver.Chrome()
driver.get(url)
time.sleep(3)
# driver.implicitly_wait(3)
soup = BeautifulSoup(driver.page_source, 'lxml')
tables = soup.find_all('table')
driver.quit()
# print(len(tables))
# get race info per race
info_panel = tables[1]
info = list(map(lambda x: x.get_text(), info_panel.find_all('td')))
race_info = {
'tag': info[6],
'name': info[9],
'cond': info[7] + ' ' + info[8],
'track': info[10] + ' ' + info[11]
}
# -------------------------
# input and process results
# -------------------------
index = INDEX_MIN
table_results = make_list(tables[INDEX_MIN])
# try out the index of the awards table
while len(table_results) == 0 or len(table_results[0]) == 0 or table_results[0][0] != "名次":
index = index + 1
table_results = make_list(tables[index])
# filter valid rows
table_results = list(filter(lambda x: len(x) > 10, table_results))
for i, row in enumerate(table_results):
if i == 0: continue
# join the section positions into 1 slot
table_results[i] = row
# -----------------------------
# input and process award rates
# -----------------------------
index = INDEX_MIN
table_awards = make_list(tables[INDEX_MIN])
# try out the index of the awards table
while len(table_awards) == 0 or len(table_awards[0]) == 0 or table_awards[0][0] != "派彩":
index = index + 1
table_awards = make_list(tables[index])
# process the awards table
table_awards = table_awards[1:]
for i, row in reversed(list(enumerate(table_awards))):
if i == 0: continue
if util.is_even(len(row)):
table_awards[i-1] += row
table_awards = list(map(
lambda x: [x[0], list(zip(x[1::2], x[2::2]))],
list(filter(
lambda x: not util.is_even(len(x)),
table_awards
))
))
# print_table(table_awards)
return race_info, table_results, table_awards
def get_racecard(url):
# get response from url
tables = []
while len(tables) < INDEX_MIN:
# print(len(tables), end=' ')
driver = webdriver.Chrome()
driver.get(url)
time.sleep(3)
# driver.implicitly_wait(3)
soup = BeautifulSoup(driver.page_source, 'lxml')
tables = soup.find_all('table')
driver.quit()
# print(len(tables))
# input and process racecard
table_racecard = make_list(tables[8])
return table_racecard