-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathETF.py
68 lines (59 loc) · 2.22 KB
/
ETF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pandas as pd
import requests
from bs4 import BeautifulSoup
class ETF(object):
top_ten: pd.DataFrame
sector: pd.DataFrame
currency: pd.DataFrame
country: pd.DataFrame
name: str
def __init__(self, url, name):
headers = {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'GET',
'Access-Control-Allow-Headers': 'Content-Type',
}
req = requests.get(url, headers)
soup = BeautifulSoup(req.content, 'html.parser')
self.name = name
self.top_ten = self._extract_table(soup, "topTenIndex")
self.sector = self._load_sector(soup, name)
self.currency = self._load_currency(soup, name)
self.country = self._load_country(soup, name)
@staticmethod
def _load_currency(soup, name):
df = ETF._extract_table(soup, "breakdown-scrollable-box-index-currencies")
df = df.drop(2, axis=1)
df.columns = ["Currency", name]
df = df.set_index("Currency")
df = df.replace(regex=r'%', value="")
df = df.replace(regex=r',', value=".")
df[name] = pd.to_numeric(df[name])
return df
@staticmethod
def _load_sector(soup, name):
df = ETF._extract_table(soup, "breakdown-scrollable-box-index-sectors")
df = df.drop(2, axis=1)
df.columns = ["Sector", name]
df = df.set_index("Sector")
df = df.replace(regex=r'%', value="")
df = df.replace(regex=r',', value=".")
df[name] = pd.to_numeric(df[name])
return df
@staticmethod
def _load_country(soup, name):
df = ETF._extract_table(soup, "breakdown-scrollable-box-index-countries")
df.columns = ["Country", name]
df = df.set_index("Country")
df = df.replace(regex=r'%', value="")
df = df.replace(regex=r',', value=".")
df[name] = pd.to_numeric(df[name])
return df
@staticmethod
def _extract_table(soup, index) -> pd.DataFrame:
div = soup.find_all("div", {"id": ("%s" % index)})
if len(div) == 1:
df_div = pd.read_html(str(div[0]))
else:
raise RuntimeError("extract table failed. Table not found or more than one.")
return df_div[0]