From a7c5a2538df00231d8bc5b5ec0554d8733f0fe62 Mon Sep 17 00:00:00 2001 From: JeongJae Park Date: Tue, 21 Mar 2023 17:55:59 +0900 Subject: [PATCH] [feat] add batch normalization in 'FullyConnectedNet' #1 - implement additional helper layer in 'layer_utils.py': 'affine_bn_relu_forward' & 'affine_bn_relu_backward' --- .../assignment2/cs231n/classifiers/fc_net.py | 36 +++-- cs231n_2022/assignment2/cs231n/layer_utils.py | 132 ++++++++++++++++++ 2 files changed, 160 insertions(+), 8 deletions(-) create mode 100644 cs231n_2022/assignment2/cs231n/layer_utils.py diff --git a/cs231n_2022/assignment2/cs231n/classifiers/fc_net.py b/cs231n_2022/assignment2/cs231n/classifiers/fc_net.py index 0a98f41..5b65c75 100644 --- a/cs231n_2022/assignment2/cs231n/classifiers/fc_net.py +++ b/cs231n_2022/assignment2/cs231n/classifiers/fc_net.py @@ -80,6 +80,10 @@ def __init__( self.params['W' + str(i + 1)] = weight_scale * np.random.randn(dims[i], dims[i + 1]) self.params['b' + str(i + 1)] = np.zeros(dims[i + 1]) + if self.normalization == 'batchnorm' and i < self.num_layers: + self.params['gamma' + str(i + 1)] = np.ones(dims[i + 1]) + self.params['beta' + str(i + 1)] = np.zeros(dims[i + 1]) + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ############################################################################ # END OF YOUR CODE # @@ -155,12 +159,23 @@ def loss(self, X, y=None): cache = {} out = X + for i in range(1, self.num_layers): - out, cache[i] = affine_relu_forward( - out, - self.params['W' + str(i)], - self.params['b' + str(i)], - ) + if self.normalization == 'batchnorm': + out, cache[i] = affine_bn_relu_forward( + out, + self.params['W' + str(i)], + self.params['b' + str(i)], + self.params['gamma' + str(i)], + self.params['beta' + str(i)], + self.bn_params[i - 1] + ) + else: + out, cache[i] = affine_relu_forward( + out, + self.params['W' + str(i)], + self.params['b' + str(i)], + ) scores, cache[self.num_layers] = affine_forward( out, @@ -196,11 +211,16 @@ def loss(self, X, y=None): loss, dscores = softmax_loss(scores, y) dout, dW, db = affine_backward(dscores, cache[self.num_layers]) - grads['W'+str(self.num_layers)] = dW + self.reg * self.params['W' + str(self.num_layers)] - grads['b'+str(self.num_layers)] = db + grads['W' + str(self.num_layers)] = dW + self.reg * self.params['W' + str(self.num_layers)] + grads['b' + str(self.num_layers)] = db for i in list(reversed(range(1, self.num_layers))): - dout, dW, db = affine_relu_backward(dout, cache[i]) + if self.normalization == 'batchnorm': + dout, dW, db, dgamma, dbeta = affine_bn_relu_backward(dout, cache[i]) + grads['beta'+str(i)]=dbeta + grads['gamma'+str(i)]=dgamma + else: + dout, dW, db = affine_relu_backward(dout, cache[i]) grads['W' + str(i)] = dW + self.reg * self.params['W' + str(i)] grads['b' + str(i)] = db diff --git a/cs231n_2022/assignment2/cs231n/layer_utils.py b/cs231n_2022/assignment2/cs231n/layer_utils.py new file mode 100644 index 0000000..fa4b90e --- /dev/null +++ b/cs231n_2022/assignment2/cs231n/layer_utils.py @@ -0,0 +1,132 @@ +from .layers import * +from .fast_layers import * + + +def affine_relu_forward(x, w, b): + """Convenience layer that performs an affine transform followed by a ReLU. + + Inputs: + - x: Input to the affine layer + - w, b: Weights for the affine layer + + Returns a tuple of: + - out: Output from the ReLU + - cache: Object to give to the backward pass + """ + a, fc_cache = affine_forward(x, w, b) + out, relu_cache = relu_forward(a) + cache = (fc_cache, relu_cache) + return out, cache + +def affine_relu_backward(dout, cache): + """Backward pass for the affine-relu convenience layer. + """ + fc_cache, relu_cache = cache + da = relu_backward(dout, relu_cache) + dx, dw, db = affine_backward(da, fc_cache) + return dx, dw, db + +# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + +def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param): + a, fc_cache = affine_forward(x, w, b) + bn_out, bn_cache = batchnorm_forward(a, gamma, beta, bn_param) + relu_out, relu_cache = relu_forward(bn_out) + cache = (fc_cache, bn_cache, relu_cache) + return relu_out, cache + +def affine_bn_relu_backward(dout, cache): + fc_cache, bn_cache, relu_cache = cache + drelu_out = relu_backward(dout, relu_cache) + dbn_out, dgamma, dbeta = batchnorm_backward_alt(drelu_out, bn_cache) + dx, dw, db = affine_backward(dbn_out, fc_cache) + return dx, dw, db, dgamma, dbeta + + +# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + +def conv_relu_forward(x, w, b, conv_param): + """A convenience layer that performs a convolution followed by a ReLU. + + Inputs: + - x: Input to the convolutional layer + - w, b, conv_param: Weights and parameters for the convolutional layer + + Returns a tuple of: + - out: Output from the ReLU + - cache: Object to give to the backward pass + """ + a, conv_cache = conv_forward_fast(x, w, b, conv_param) + out, relu_cache = relu_forward(a) + cache = (conv_cache, relu_cache) + return out, cache + + +def conv_relu_backward(dout, cache): + """Backward pass for the conv-relu convenience layer. + """ + conv_cache, relu_cache = cache + da = relu_backward(dout, relu_cache) + dx, dw, db = conv_backward_fast(da, conv_cache) + return dx, dw, db + + +def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param): + """Convenience layer that performs a convolution, a batch normalization, and a ReLU. + + Inputs: + - x: Input to the convolutional layer + - w, b, conv_param: Weights and parameters for the convolutional layer + - pool_param: Parameters for the pooling layer + - gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift + parameters for batch normalization. + - bn_param: Dictionary of parameters for batch normalization. + + Returns a tuple of: + - out: Output from the pooling layer + - cache: Object to give to the backward pass + """ + a, conv_cache = conv_forward_fast(x, w, b, conv_param) + an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param) + out, relu_cache = relu_forward(an) + cache = (conv_cache, bn_cache, relu_cache) + return out, cache + + +def conv_bn_relu_backward(dout, cache): + """Backward pass for the conv-bn-relu convenience layer. + """ + conv_cache, bn_cache, relu_cache = cache + dan = relu_backward(dout, relu_cache) + da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache) + dx, dw, db = conv_backward_fast(da, conv_cache) + return dx, dw, db, dgamma, dbeta + + +def conv_relu_pool_forward(x, w, b, conv_param, pool_param): + """Convenience layer that performs a convolution, a ReLU, and a pool. + + Inputs: + - x: Input to the convolutional layer + - w, b, conv_param: Weights and parameters for the convolutional layer + - pool_param: Parameters for the pooling layer + + Returns a tuple of: + - out: Output from the pooling layer + - cache: Object to give to the backward pass + """ + a, conv_cache = conv_forward_fast(x, w, b, conv_param) + s, relu_cache = relu_forward(a) + out, pool_cache = max_pool_forward_fast(s, pool_param) + cache = (conv_cache, relu_cache, pool_cache) + return out, cache + + +def conv_relu_pool_backward(dout, cache): + """Backward pass for the conv-relu-pool convenience layer. + """ + conv_cache, relu_cache, pool_cache = cache + ds = max_pool_backward_fast(dout, pool_cache) + da = relu_backward(ds, relu_cache) + dx, dw, db = conv_backward_fast(da, conv_cache) + return dx, dw, db