Skip to content

Commit

Permalink
[feat] add batch normalization in 'FullyConnectedNet' #1
Browse files Browse the repository at this point in the history
- implement additional helper layer in 'layer_utils.py': 'affine_bn_relu_forward' & 'affine_bn_relu_backward'
  • Loading branch information
jeongjae96 committed Mar 21, 2023
1 parent b159264 commit a7c5a25
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 8 deletions.
36 changes: 28 additions & 8 deletions cs231n_2022/assignment2/cs231n/classifiers/fc_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ def __init__(
self.params['W' + str(i + 1)] = weight_scale * np.random.randn(dims[i], dims[i + 1])
self.params['b' + str(i + 1)] = np.zeros(dims[i + 1])

if self.normalization == 'batchnorm' and i < self.num_layers:
self.params['gamma' + str(i + 1)] = np.ones(dims[i + 1])
self.params['beta' + str(i + 1)] = np.zeros(dims[i + 1])

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
############################################################################
# END OF YOUR CODE #
Expand Down Expand Up @@ -155,12 +159,23 @@ def loss(self, X, y=None):
cache = {}
out = X


for i in range(1, self.num_layers):
out, cache[i] = affine_relu_forward(
out,
self.params['W' + str(i)],
self.params['b' + str(i)],
)
if self.normalization == 'batchnorm':
out, cache[i] = affine_bn_relu_forward(
out,
self.params['W' + str(i)],
self.params['b' + str(i)],
self.params['gamma' + str(i)],
self.params['beta' + str(i)],
self.bn_params[i - 1]
)
else:
out, cache[i] = affine_relu_forward(
out,
self.params['W' + str(i)],
self.params['b' + str(i)],
)

scores, cache[self.num_layers] = affine_forward(
out,
Expand Down Expand Up @@ -196,11 +211,16 @@ def loss(self, X, y=None):
loss, dscores = softmax_loss(scores, y)
dout, dW, db = affine_backward(dscores, cache[self.num_layers])

grads['W'+str(self.num_layers)] = dW + self.reg * self.params['W' + str(self.num_layers)]
grads['b'+str(self.num_layers)] = db
grads['W' + str(self.num_layers)] = dW + self.reg * self.params['W' + str(self.num_layers)]
grads['b' + str(self.num_layers)] = db

for i in list(reversed(range(1, self.num_layers))):
dout, dW, db = affine_relu_backward(dout, cache[i])
if self.normalization == 'batchnorm':
dout, dW, db, dgamma, dbeta = affine_bn_relu_backward(dout, cache[i])
grads['beta'+str(i)]=dbeta
grads['gamma'+str(i)]=dgamma
else:
dout, dW, db = affine_relu_backward(dout, cache[i])

grads['W' + str(i)] = dW + self.reg * self.params['W' + str(i)]
grads['b' + str(i)] = db
Expand Down
132 changes: 132 additions & 0 deletions cs231n_2022/assignment2/cs231n/layer_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
from .layers import *
from .fast_layers import *


def affine_relu_forward(x, w, b):
"""Convenience layer that performs an affine transform followed by a ReLU.
Inputs:
- x: Input to the affine layer
- w, b: Weights for the affine layer
Returns a tuple of:
- out: Output from the ReLU
- cache: Object to give to the backward pass
"""
a, fc_cache = affine_forward(x, w, b)
out, relu_cache = relu_forward(a)
cache = (fc_cache, relu_cache)
return out, cache

def affine_relu_backward(dout, cache):
"""Backward pass for the affine-relu convenience layer.
"""
fc_cache, relu_cache = cache
da = relu_backward(dout, relu_cache)
dx, dw, db = affine_backward(da, fc_cache)
return dx, dw, db

# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param):
a, fc_cache = affine_forward(x, w, b)
bn_out, bn_cache = batchnorm_forward(a, gamma, beta, bn_param)
relu_out, relu_cache = relu_forward(bn_out)
cache = (fc_cache, bn_cache, relu_cache)
return relu_out, cache

def affine_bn_relu_backward(dout, cache):
fc_cache, bn_cache, relu_cache = cache
drelu_out = relu_backward(dout, relu_cache)
dbn_out, dgamma, dbeta = batchnorm_backward_alt(drelu_out, bn_cache)
dx, dw, db = affine_backward(dbn_out, fc_cache)
return dx, dw, db, dgamma, dbeta


# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

def conv_relu_forward(x, w, b, conv_param):
"""A convenience layer that performs a convolution followed by a ReLU.
Inputs:
- x: Input to the convolutional layer
- w, b, conv_param: Weights and parameters for the convolutional layer
Returns a tuple of:
- out: Output from the ReLU
- cache: Object to give to the backward pass
"""
a, conv_cache = conv_forward_fast(x, w, b, conv_param)
out, relu_cache = relu_forward(a)
cache = (conv_cache, relu_cache)
return out, cache


def conv_relu_backward(dout, cache):
"""Backward pass for the conv-relu convenience layer.
"""
conv_cache, relu_cache = cache
da = relu_backward(dout, relu_cache)
dx, dw, db = conv_backward_fast(da, conv_cache)
return dx, dw, db


def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param):
"""Convenience layer that performs a convolution, a batch normalization, and a ReLU.
Inputs:
- x: Input to the convolutional layer
- w, b, conv_param: Weights and parameters for the convolutional layer
- pool_param: Parameters for the pooling layer
- gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift
parameters for batch normalization.
- bn_param: Dictionary of parameters for batch normalization.
Returns a tuple of:
- out: Output from the pooling layer
- cache: Object to give to the backward pass
"""
a, conv_cache = conv_forward_fast(x, w, b, conv_param)
an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param)
out, relu_cache = relu_forward(an)
cache = (conv_cache, bn_cache, relu_cache)
return out, cache


def conv_bn_relu_backward(dout, cache):
"""Backward pass for the conv-bn-relu convenience layer.
"""
conv_cache, bn_cache, relu_cache = cache
dan = relu_backward(dout, relu_cache)
da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache)
dx, dw, db = conv_backward_fast(da, conv_cache)
return dx, dw, db, dgamma, dbeta


def conv_relu_pool_forward(x, w, b, conv_param, pool_param):
"""Convenience layer that performs a convolution, a ReLU, and a pool.
Inputs:
- x: Input to the convolutional layer
- w, b, conv_param: Weights and parameters for the convolutional layer
- pool_param: Parameters for the pooling layer
Returns a tuple of:
- out: Output from the pooling layer
- cache: Object to give to the backward pass
"""
a, conv_cache = conv_forward_fast(x, w, b, conv_param)
s, relu_cache = relu_forward(a)
out, pool_cache = max_pool_forward_fast(s, pool_param)
cache = (conv_cache, relu_cache, pool_cache)
return out, cache


def conv_relu_pool_backward(dout, cache):
"""Backward pass for the conv-relu-pool convenience layer.
"""
conv_cache, relu_cache, pool_cache = cache
ds = max_pool_backward_fast(dout, pool_cache)
da = relu_backward(ds, relu_cache)
dx, dw, db = conv_backward_fast(da, conv_cache)
return dx, dw, db

0 comments on commit a7c5a25

Please sign in to comment.