-
Notifications
You must be signed in to change notification settings - Fork 0
/
firds.py
179 lines (141 loc) · 6.09 KB
/
firds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""A few functions to download and access data from ESMA's Financial
Instrument Reference Data System (FIRDS).
"""
import os
import sqlite3 as sql
from datetime import datetime, timedelta
from zipfile import ZipFile
from io import BytesIO
from typing import Dict, Optional, List, Iterable
import requests
from lxml import etree
### The functions below are helper functions to download and extract FIRDS XML data.
FIRST_FIRDS_DATE = datetime(2017, 10, 15) # Apparently the earliest date on which there are any FIRDS files.
DATA_DIR = 'data_files'
Q_URL = ('https://registers.esma.europa.eu/solr/esma_registers_firds_files/'
'select?q=*&fq=publication_date:%5B{from_year}-{from_month}-'
'{from_day}T00:00:00Z+TO+{to_year}-{to_month}-{to_day}T23:59:59Z%5D'
'&wt=xml&indent=true&start={start}&rows={rows}')
FNAME_START = 'FULINS_{}'
def _request_file_urls(from_date: datetime, to_date: datetime, start, rows) -> etree._Element:
"""Request data on FIRDS files (including download URLs) for the
given timeframe and return the XML object.
"""
url = Q_URL.format(
from_year=from_date.year,
from_month=from_date.month,
from_day=from_date.day,
to_year=to_date.year,
to_month=to_date.month,
to_day=to_date.day,
start=start,
rows=rows
)
response = requests.get(url)
return etree.fromstring(response.content)
def _parse_file_urls(root: etree._Element, ftype: str = '') -> List[str]:
"""Takes an XML element with FIRDS file data (such as that returned
by _request_file_urls) and returns download URLs for all relevant
files.
`ftype`, if given, should be the first letter of the CFI code for
the relevant security type (eg, 'B' for bonds), or a string of such
letters (eg, "BE" for bonds and equities). In that case only URLs
for files of the specified type(s) will be included.
"""
urls = []
for entry in root[1]:
fname = entry.xpath('.//str[@name="file_name"]')[0].text
if (not ftype) or any(fname.startswith(FNAME_START.format(f)) for f in ftype):
url = entry.xpath('.//str[@name="download_link"]')[0].text
urls.append(url)
return urls
def get_file_urls(from_date: datetime = None, to_date: datetime = None, ftype: str = '') -> List[str]:
"""Get the download URLs for all FIRDS files within the given
timeframe, for the given security type (if provided).
"""
start = 0
rows = 100
if from_date is None:
to_date = datetime.today()
from_date = to_date - timedelta(weeks=1)
elif to_date is None:
to_date = from_date
root = _request_file_urls(from_date, to_date, start, rows)
num_results = int(root[1].attrib['numFound'])
urls = _parse_file_urls(root, ftype)
while num_results > (start + rows):
start += rows
root = _request_file_urls(from_date, to_date, start, rows)
urls += _parse_file_urls(root, ftype)
return urls
def download_zipped_file(url: str, to_dir: str) -> str:
"""Download a zip file from `url` and extract the contents to `to_dir`.
returns the path to the extracted file (this assumes there is only one
file in the zip archive).
"""
if not os.path.exists(to_dir):
os.makedirs(to_dir)
response = requests.get(url)
response.raise_for_status()
zipfile = ZipFile(BytesIO(response.content))
name = zipfile.namelist()[0]
zipfile.extractall(path=to_dir)
return os.path.join(to_dir, name)
def get_xml_files(ftype: str = '',
data_dir: str = '.',
from_date: Optional[datetime] = None,
to_date: Optional[datetime] = None,
force_dl: bool = False) -> List[str]:
"""Download and extract all FIRDS XML files for the given security
type and timeframe. Returns a list of filepaths.
"""
if not os.path.exists(data_dir):
os.makedirs(data_dir)
urls = get_file_urls(from_date, to_date, ftype=ftype)
fpaths = [download_zipped_file(url, data_dir) for url in urls]
return fpaths
### The following functions are used create an ISIN->LEI lookup table based on the FIRDS data.
DEFAULT_DB_FILE = 'firds.db'
ns = {
'BizData': 'urn:iso:std:iso:20022:tech:xsd:head.003.001.01',
'AppHdr': 'urn:iso:std:iso:20022:tech:xsd:head.001.001.01',
'Document': 'urn:iso:std:iso:20022:tech:xsd:auth.017.001.02'
}
CREATE_TABLE = """CREATE TABLE IF NOT EXISTS \"{table}\" (
isin TEXT PRIMARY KEY,
lei TEXT NOT NULL
)"""
def append_to_table(table: str, xml_file: str, db_file: str = DEFAULT_DB_FILE):
"""Create a table with name `table`, if it does not already exist,
and insert ISIN/LEI pairs for each security described in `xml_file`.
"""
conn = sql.connect(db_file)
cursor = conn.cursor()
cursor.execute(CREATE_TABLE.format(table=table))
tree = etree.parse(xml_file)
root = tree.getroot()
args = []
#print(root.xpath('Document:RefData', namespaces=ns))
for ref_data in root.xpath('//Document:RefData', namespaces=ns):
isin = ref_data.xpath('Document:FinInstrmGnlAttrbts/Document:Id', namespaces=ns)[0].text
lei = ref_data.xpath('Document:Issr', namespaces=ns)[0].text
#print(isin, lei)
#cursor.execute(f'INSERT OR IGNORE INTO "{table}" (isin, lei) VALUES (?, ?)', (isin, lei))
args.append((isin, lei))
cursor.executemany(f'INSERT OR IGNORE INTO "{table}" (isin, lei) VALUES (?, ?)', args)
conn.commit()
def lookup_leis(isins: Iterable[str], table: str, db_file: str = DEFAULT_DB_FILE) -> List[str]:
"""Looks up each ISIN code in `isins` in `table` to find its
corresponding issuer LEI. Returns a list of LEIs in the same
order as `isins`. Where an LEI is not found, None appears at
the relevant index of the list.
"""
conn = sql.connect(db_file)
cursor = conn.cursor()
results = []
for i in isins:
cursor.execute(f'SELECT lei FROM "{table}" WHERE isin=?', (i,))
results.append(cursor.fetchone()[0])
return results