Skip to content

Commit

Permalink
Second Edition - Alpha
Browse files Browse the repository at this point in the history
  • Loading branch information
stefan-jansen committed Jun 26, 2020
1 parent e951ab0 commit 388c44c
Show file tree
Hide file tree
Showing 331 changed files with 200,239 additions and 424,460 deletions.
100 changes: 76 additions & 24 deletions 01_machine_learning_for_trading/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,16 @@
"## Imports"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"warnings.filterwarnings('ignore')"
]
},
{
"cell_type": "code",
"execution_count": 1,
Expand Down Expand Up @@ -113,6 +123,28 @@
"sns.set_style('whitegrid')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def format_time(t):\n",
" \"\"\"Return a formatted time string 'HH:MM:SS\n",
" based on a numeric time() value\"\"\"\n",
" m, s = divmod(t, 60)\n",
" h, m = divmod(m, 60)\n",
" return f'{h:0>2.0f}:{m:0>2.0f}:{s:0>5.2f}'"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -273,8 +305,7 @@
"ExecuteTime": {
"end_time": "2018-12-25T19:06:08.577453Z",
"start_time": "2018-12-25T19:06:08.570117Z"
},
"scrolled": false
}
},
"outputs": [
{
Expand Down Expand Up @@ -1267,8 +1298,7 @@
"ExecuteTime": {
"end_time": "2018-12-25T17:59:34.870288Z",
"start_time": "2018-12-25T17:29:45.640518Z"
},
"scrolled": false
}
},
"outputs": [
{
Expand Down Expand Up @@ -1327,27 +1357,25 @@
" \n",
" # deal with system events\n",
" if message_type == 'S':\n",
" timestamp = int.from_bytes(message.timestamp, byteorder='big')\n",
" seconds = int.from_bytes(message.timestamp, byteorder='big') * 1e-9\n",
" print('\\n', event_codes.get(message.event_code.decode('ascii'), 'Error'))\n",
" print('\\t{0}\\t{1:,.0f}'.format(timedelta(seconds=timestamp * 1e-9),\n",
" message_count))\n",
" print(f'\\t{format_time(seconds)}\\t{message_count:12,.0f}')\n",
" if message.event_code.decode('ascii') == 'C':\n",
" store_messages(messages)\n",
" break\n",
" message_count += 1\n",
" \n",
" if message_count % 2.5e7 == 0: \n",
" t = timedelta(seconds=int.from_bytes(message.timestamp, byteorder='big') * 1e-9)\n",
" d = timedelta(seconds=time() - start)\n",
" print('\\t{t}\\t{message_count:12,.0f}\\t{d}')\n",
"\n",
" if message_count % 2.5e7 == 0:\n",
" seconds = int.from_bytes(message.timestamp, byteorder='big') * 1e-9\n",
" d = format_time(time() - start)\n",
" print(f'\\t{format_time(seconds)}\\t{message_count:12,.0f}\\t{d}')\n",
" res = store_messages(messages)\n",
" if res == 1:\n",
" print(pd.Series(dict(message_type_counter)).sort_values())\n",
" break\n",
" messages.clear()\n",
" \n",
" \n",
"print(timedelta(seconds=time() - start))"
"\n",
"print('Duration:', format_time(time() - start))"
]
},
{
Expand Down Expand Up @@ -1557,8 +1585,7 @@
"ExecuteTime": {
"end_time": "2020-03-27T15:19:06.905421Z",
"start_time": "2020-03-27T15:19:05.149409Z"
},
"scrolled": false
}
},
"outputs": [
{
Expand All @@ -1576,22 +1603,17 @@
"with pd.HDFStore(itch_store) as store:\n",
" stocks = store['R'].loc[:, ['stock_locate', 'stock']]\n",
" trades = store['P'].append(store['Q'].rename(columns={'cross_price': 'price'}), sort=False).merge(stocks)\n",
"\n",
"trades['value'] = trades.shares.mul(trades.price)\n",
"trades['value_share'] = trades.value.div(trades.value.sum())\n",
"\n",
"trade_summary = trades.groupby('stock').value_share.sum().sort_values(ascending=False)\n",
"trade_summary.iloc[:50].plot.bar(figsize=(14, 6), color='darkblue', title='Share of Traded Value')\n",
"\n",
"plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))\n",
"plt.tight_layout()\n",
"sns.despine()\n",
"# plt.savefig('figures/share_of_trade_vol', dpi=300)"
"plt.tight_layout()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down Expand Up @@ -1632,5 +1654,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 2
}
"nbformat_minor": 4
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -1073,8 +1073,7 @@
"\n",
"plt.legend(handles=[red_patch, blue_patch])\n",
"sns.despine()\n",
"fig.tight_layout()\n",
"fig.savefig('figures/orderbook', dpi=300)"
"fig.tight_layout()"
]
}
],
Expand Down Expand Up @@ -1117,4 +1116,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,7 @@
"tick_bars.price.plot(figsize=(10, 5), \n",
" title='Tick Bars | {} | {}'.format(stock, pd.to_datetime(date).date()), lw=1)\n",
"plt.xlabel('')\n",
"plt.tight_layout()\n",
"plt.savefig('figures/tick_bars', dpi=300);"
"plt.tight_layout();"
]
},
{
Expand Down Expand Up @@ -386,8 +385,7 @@
" fig.autofmt_xdate()\n",
" fig.suptitle(suptitle)\n",
" fig.tight_layout()\n",
" plt.subplots_adjust(top=0.9)\n",
" fig.savefig(f'figures/{fname}', dpi=300);"
" plt.subplots_adjust(top=0.9);"
]
},
{
Expand Down Expand Up @@ -420,7 +418,7 @@
" ohlc = agg_trades.price.ohlc()\n",
" vol = agg_trades.shares.sum().to_frame('vol')\n",
" txn = agg_trades.shares.size().to_frame('txn')\n",
" return pd.concat([ohlc, vwap, vol, txn], axis=1) "
" return pd.concat([ohlc, vwap, vol, txn], axis=1)"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## 01 Working with Market Data: NASDAQ_TotalView-ITCH Order Book
## Working with Market Data: NASDAQ_TotalView-ITCH Order Book

While FIX has a dominant large market share, exchanges also offer native protocols. The Nasdaq offers a TotalView ITCH direct data-feed protocol that allows subscribers to track individual orders for equity instruments from placement to execution or cancellation.

Expand Down Expand Up @@ -42,7 +42,7 @@ For each message, the specification lays out the components and their respective
| Price | 32 | 4 | Price (4) | The display price of the new order. Refer to Data Types for field processing notes. |
| Attribution | 36 | 4 | Alpha | Nasdaq Market participant identifier associated with the entered order |

The notebook [01_build_itch_order_book](01_build_itch_order_book.ipynb) contains the code to
The notebooks [01_build_itch_order_book](01_parse_itch_order_flow_messages.ipynb), [02_rebuild_nasdaq_order_book](02_rebuild_nasdaq_order_book.ipynb) and [03_normalize_tick_data](03_normalize_tick_data.ipynb) contain the code to
- download NASDAQ Total View sample tick data,
- parse the messages from the binary source data
- reconstruct the order book for a given stock
Expand All @@ -54,8 +54,9 @@ The code has been updated to use the latest NASDAQ sample file dated March 27, 2
Warning: the tick data is around 12GB in size and some processing steps can take several hours on a 4-core i7 CPU with 32GB RAM.

### Regularizing tick data

The trade data is indexed by nanoseconds and is very noisy. The bid-ask bounce, for instance, causes the price to oscillate between the bid and ask prices when trade initiation alternates between buy and sell market orders. To improve the noise-signal ratio and improve the statistical properties, we need to resample and regularize the tick data by aggregating the trading activity.

We typically collect the open (first), low, high, and closing (last) price for the aggregated period, alongside the volume-weighted average price (VWAP), the number of shares traded, and the timestamp associated with the data.

The notebook [02_normalize_tick_data](02_normalize_tick_data.ipynb) illustrates how to normalize noisy tick using time and volume bars that use different aggregation methods.
The notebook [03_normalize_tick_data](03_normalize_tick_data.ipynb) illustrates how to normalize noisy tick using time and volume bars that use different aggregation methods.
Loading

0 comments on commit 388c44c

Please sign in to comment.