-
Notifications
You must be signed in to change notification settings - Fork 23
/
data_loader.py
59 lines (46 loc) · 1.57 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""Data loader.
Author: Jinsung Yoon
Contact: [email protected]
----------------------------------------
Loads Google stock dataset with MinMax normalization.
Reference: https://finance.yahoo.com/quote/GOOGL/history?p=GOOGL
"""
# Necessary Packages
import numpy as np
from utils import MinMaxScaler
def data_loader(train_rate = 0.8, seq_len = 7):
"""Loads Google stock data.
Args:
- train_rate: the ratio between training and testing sets
- seq_len: sequence length
Returns:
- train_x: training feature
- train_y: training labels
- test_x: testing features
- test_y: testing labels
"""
# Load data
ori_data = np.loadtxt('data/google.csv', delimiter=',', skiprows = 1)
# Reverse the time order
reverse_data = ori_data[::-1]
# Normalization
norm_data = MinMaxScaler(reverse_data)
# Build dataset
data_x = []
data_y = []
for i in range(0, len(norm_data[:,0]) - seq_len):
# Previous seq_len data as features
temp_x = norm_data[i:i + seq_len,:]
# Values at next time point as labels
temp_y = norm_data[i + seq_len, [-1]]
data_x = data_x + [temp_x]
data_y = data_y + [temp_y]
data_x = np.asarray(data_x)
data_y = np.asarray(data_y)
# Train / test Division
idx = np.random.permutation(len(data_x))
train_idx = idx[:int(train_rate * len(data_x))]
test_idx = idx[int(train_rate * len(data_x)):]
train_x, test_x = data_x[train_idx, :, :], data_x[test_idx, :, :]
train_y, test_y = data_y[train_idx, :], data_y[test_idx, :]
return train_x, train_y, test_x, test_y