-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhomework3_ymtaye_yyadati_lgangaramaney.py
142 lines (112 loc) · 4.12 KB
/
homework3_ymtaye_yyadati_lgangaramaney.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
## Yared Taye
## Lokesh Gangaramaney
## Yash Yadati
import sys
import numpy as np
np.seterr(all='ignore')
def minibatches(X, y, batchsize):
indices = np.arange(X.shape[0])
np.random.shuffle(indices)
for start_idx in range(0, X.shape[0] - batchsize + 1, batchsize):
ids = indices[start_idx:start_idx + batchsize]
yield X[ids], y[ids]
def format_labels(y):
n = y.shape[0]
Train_y = np.zeros((n, 10))
for i in range(len(y)):
Train_y[i, y[i]] = 1
return Train_y
## Pre Activation Scores = Z = X^T * W
def preactivation_score(X, weight):
Z = np.dot(X , weight)
return Z
## Softmax
def softmax(z):
assert len(z.shape) == 2
s = np.max(z, axis=1)
s = s[:, np.newaxis]
e_x = np.exp(z - s)
div = np.sum(e_x, axis=1)
div = div[:, np.newaxis]
return e_x / div
## -1/n * y * log(y^)
def cross_entropy(X, y, weight):
fce = -np.sum(y * np.log(softmax(preactivation_score(X, weight)))) / y.shape[0]
return fce
def gradient(X, y, weight, alpha):
n = y.shape[0]
diff_y = (softmax(preactivation_score(X, weight)) - y) / n
unregularized = X.T.dot(diff_y)
return unregularized + (alpha * weight), np.mean(diff_y, axis=0)
def SGD( X_tr, ytr, StepSize=0.001, alpha=0.01, batchsize=50, epochs=20):
n, m = X_tr.shape
n, p = ytr.shape
init = np.append(np.random.randn(m,p), np.random.randn(1,p), axis=0)
weight = init[:-1]
bias = init[-1]
for e in range(0, epochs):
for batch in minibatches(X_tr, ytr, batchsize):
X_tr_batch, ytr_batch = batch
dw_dx, db_dx = gradient(X_tr_batch, ytr_batch, weight, alpha)
weight = weight - (StepSize * dw_dx)
bias = bias - (StepSize * db_dx)
init = np.vstack((weight, bias))
return init[:-1]
def predictions(Train_X, Test_X, Train_y, Test_y, aug_w, StepSize, batches, ep, Wd):
predict = softmax(preactivation_score(Test_X, aug_w))
train_cost = cross_entropy(Train_X, Train_y, aug_w)
test_cost = cross_entropy(Test_X, Test_y, aug_w)
result = np.argmax(predict, axis=1) - np.argmax(Test_y, axis=1)
acc = len(result[result == 0]) / len(Test_y)
if acc > 0.80 or test_cost < 0.6:
print("=" * 50)
print(" ACCURACY =", acc)
print("=" * 50)
print("*" * 50)
print('Current Learning Rate: ', StepSize)
print('Batches: ', batches)
print('Epochs', ep)
print('L2-Reg', Wd)
print('FCE for Training Set: ', train_cost)
print('FCE for Testing Set: ', test_cost)
print("*" * 50)
def grid_search():
### Grid Search
LearningRates = [0.0001, 0.001, 0.005, 0.01, 0.1]
BatchSizes = [10, 50, 100, 200, 500, 1000]
WeightDecays = [0.01, 0.02, 0.05, 0.1, 0.5, 2]
num_epochs = [50, 100, 200, 400, 1000]
for ep in num_epochs:
for Wd in WeightDecays:
for batches in BatchSizes:
for lr in LearningRates:
weight = (SGD(X_tr, ytr, alpha=Wd, StepSize=lr, batchsize=batches, epochs=ep))
predictions(X_val, X_te, yval, yte, weight, lr, batches, ep, Wd)
print("Epoch set is done")
def summary():
## USED HYPERPARAMETERS
alpha = 0.02
epislon = 0.005
num_batches = 10
num_epochs = 50
weight = (SGD(X_tr, ytr, alpha=alpha, StepSize=epislon, batchsize=num_batches, epochs=num_epochs))
predictions(X_tr, X_te, ytr, yte, weight, ep=num_epochs, batches=num_batches, StepSize=epislon, Wd=alpha)
##Load Data
X_tr = np.reshape(np.load("fashion_mnist_train_images.npy"), (-1, 28 * 28))
ytr = np.load("fashion_mnist_train_labels.npy")
X_te = np.reshape(np.load("fashion_mnist_test_images.npy"), (-1, 28 * 28))
yte = np.load("fashion_mnist_test_labels.npy")
## Split Dataset into Validation for Grid Search and Train 80:20
X_tr, X_val = X_tr[:48000, :], X_tr[48000:, :]
ytr, yval = ytr[:48000], ytr[48000:]
## Format Labels
ytr = format_labels(ytr)
yte = format_labels(yte)
yval = format_labels(yval)
## Normalize
X_tr *= 1/255
X_val *= (1/255)
X_te *= 1/255
## Run either summary() for best output or grid_search()
#summary()
grid_search()