-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.py
91 lines (66 loc) · 3.31 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 1 16:39:02 2021
@author: Rajesh Rao
This script works to format economic release data from Bloomberg, as stored
in an excel file. Note, API users need not worry about this.
"""
##########################################################################
# PACKAGE IMPORTS
##########################################################################
import os
import time
import numpy as np
import pandas as pd
# %% Economic Data Numeric Conversion
print('\nParsing Bloomberg Economic Data file...\n')
start = time.time()
# create excel object to determine number of excel sheets and read sequentially
xl = pd.ExcelFile(os.getcwd() + '/Input/ECO_RELEASES.xlsx')
res = xl.sheet_names
# initialize memory for variable storage
concat_list = []
# iterate through each of the excel sheets, concat dataframes
for pg in range(len(res)):
# read dataframe from economic release sheet
df = pd.read_excel(os.getcwd() + '/Input/ECO_RELEASES.xlsx', sheet_name=pg)
# construct proper database from each sheet
export_df = df.iloc[5:]
export_df.columns = df.iloc[4]
# remove all NaNs present within the Release Date
export_df = export_df[~np.isnat(pd.to_datetime(export_df.RELEASE_DATE))]
# add names and corresponding TICKERS
n = export_df.shape[0]
export_df['TICKER'] = [res[pg]] * n
export_df['NAME'] = [df.iloc[0].iloc[1]] * n
concat_list.append(export_df)
print('\tSheet %d, extracted %s data' % (pg+1, df.iloc[0].iloc[1]))
# concat all of the dataframes (all macroeconomic event)
concat_pd = pd.concat(concat_list)
# back-propagate the bloomberg relevancy indicator (measure on follow activity)
concat_pd['RELEVANCE_VALUE'] = concat_pd['RELEVANCE_VALUE'].fillna(method='bfill')
concat_pd = concat_pd.sort_values(by='RELEASE_DATE')
concat_pd = concat_pd[['RELEASE_DATE', 'TICKER', 'NAME', 'ACTUAL_RELEASE', 'ECO_RELEASE_DT',
'BN_SURVEY_MEDIAN', 'BN_SURVEY_AVERAGE', 'BN_SURVEY_HIGH','BN_SURVEY_LOW',
'FORECAST_STANDARD_DEVIATION', 'BN_SURVEY_NUMBER_OBSERVATIONS',
'RELEVANCE_VALUE']]
# replace values where the forecast standard deviation is zero with NaNto avoid zero division error
concat_pd['FORECAST_STANDARD_DEVIATION'] = concat_pd['FORECAST_STANDARD_DEVIATION'].replace({
0:np.nan})
# construct economic forecast surprises and z-score measures
concat_pd['SURPRISES'] = concat_pd['ACTUAL_RELEASE'] - concat_pd['BN_SURVEY_MEDIAN']
concat_pd['ZSCORE'] = concat_pd['SURPRISES'] / concat_pd['FORECAST_STANDARD_DEVIATION']
# ----------------------------------------------------------------------
# remove duplicates that exist for specific releases (data-release lag)
# e.g.
# 12/04/13 | New Home Sales | Sep | 425k
# 12/04/13 | New Home Sales | Oct | 429k
# ----------------------------------------------------------------------
# we do not preserve any duplicates instead drop all duplicate rows
concat_pd = concat_pd.drop_duplicates(subset=['RELEASE_DATE', 'TICKER'], keep=False)
# %% Export Files locally to folder
print('\nCleaned Bloomberg Economic Data completed')
print('\tTime Taken: %.2f minutes' % ((time.time() - start) / 60))
concat_pd.to_csv(os.getcwd() + '/Output/bloomberg_economic_releases.csv',
index=False)