From a7c5a2538df00231d8bc5b5ec0554d8733f0fe62 Mon Sep 17 00:00:00 2001
From: JeongJae Park <jeongjae1996@naver.com>
Date: Tue, 21 Mar 2023 17:55:59 +0900
Subject: [PATCH] [feat] add batch normalization in 'FullyConnectedNet' #1

- implement additional helper layer in 'layer_utils.py': 'affine_bn_relu_forward' & 'affine_bn_relu_backward'
---
 .../assignment2/cs231n/classifiers/fc_net.py  |  36 +++--
 cs231n_2022/assignment2/cs231n/layer_utils.py | 132 ++++++++++++++++++
 2 files changed, 160 insertions(+), 8 deletions(-)
 create mode 100644 cs231n_2022/assignment2/cs231n/layer_utils.py

diff --git a/cs231n_2022/assignment2/cs231n/classifiers/fc_net.py b/cs231n_2022/assignment2/cs231n/classifiers/fc_net.py
index 0a98f41..5b65c75 100644
--- a/cs231n_2022/assignment2/cs231n/classifiers/fc_net.py
+++ b/cs231n_2022/assignment2/cs231n/classifiers/fc_net.py
@@ -80,6 +80,10 @@ def __init__(
             self.params['W' + str(i + 1)] = weight_scale * np.random.randn(dims[i], dims[i + 1])
             self.params['b' + str(i + 1)] = np.zeros(dims[i + 1])
 
+            if self.normalization == 'batchnorm' and i < self.num_layers:
+                self.params['gamma' + str(i + 1)] = np.ones(dims[i + 1])
+                self.params['beta' + str(i + 1)] = np.zeros(dims[i + 1])
+
         # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
         ############################################################################
         #                             END OF YOUR CODE                             #
@@ -155,12 +159,23 @@ def loss(self, X, y=None):
         cache = {}
         out = X
 
+        
         for i in range(1, self.num_layers):
-            out, cache[i] = affine_relu_forward(
-                out, 
-                self.params['W' + str(i)],
-                self.params['b' + str(i)],
-            )
+            if self.normalization == 'batchnorm':
+                out, cache[i] = affine_bn_relu_forward(
+                    out,
+                    self.params['W' + str(i)],
+                    self.params['b' + str(i)],
+                    self.params['gamma' + str(i)],
+                    self.params['beta' + str(i)],
+                    self.bn_params[i - 1]
+                )
+            else:
+                out, cache[i] = affine_relu_forward(
+                    out, 
+                    self.params['W' + str(i)],
+                    self.params['b' + str(i)],
+                )
 
         scores, cache[self.num_layers] = affine_forward(
             out,
@@ -196,11 +211,16 @@ def loss(self, X, y=None):
         loss, dscores = softmax_loss(scores, y)
         dout, dW, db = affine_backward(dscores, cache[self.num_layers])
 
-        grads['W'+str(self.num_layers)] = dW + self.reg * self.params['W' + str(self.num_layers)]
-        grads['b'+str(self.num_layers)] = db
+        grads['W' + str(self.num_layers)] = dW + self.reg * self.params['W' + str(self.num_layers)]
+        grads['b' + str(self.num_layers)] = db
 
         for i in list(reversed(range(1, self.num_layers))):
-            dout, dW, db = affine_relu_backward(dout, cache[i])
+            if self.normalization == 'batchnorm':
+                dout, dW, db, dgamma, dbeta = affine_bn_relu_backward(dout, cache[i])
+                grads['beta'+str(i)]=dbeta
+                grads['gamma'+str(i)]=dgamma
+            else:
+                dout, dW, db = affine_relu_backward(dout, cache[i])
 
             grads['W' + str(i)] = dW + self.reg * self.params['W' + str(i)]
             grads['b' + str(i)] = db
diff --git a/cs231n_2022/assignment2/cs231n/layer_utils.py b/cs231n_2022/assignment2/cs231n/layer_utils.py
new file mode 100644
index 0000000..fa4b90e
--- /dev/null
+++ b/cs231n_2022/assignment2/cs231n/layer_utils.py
@@ -0,0 +1,132 @@
+from .layers import *
+from .fast_layers import *
+
+
+def affine_relu_forward(x, w, b):
+    """Convenience layer that performs an affine transform followed by a ReLU.
+
+    Inputs:
+    - x: Input to the affine layer
+    - w, b: Weights for the affine layer
+
+    Returns a tuple of:
+    - out: Output from the ReLU
+    - cache: Object to give to the backward pass
+    """
+    a, fc_cache = affine_forward(x, w, b)
+    out, relu_cache = relu_forward(a)
+    cache = (fc_cache, relu_cache)
+    return out, cache
+
+def affine_relu_backward(dout, cache):
+    """Backward pass for the affine-relu convenience layer.
+    """
+    fc_cache, relu_cache = cache
+    da = relu_backward(dout, relu_cache)
+    dx, dw, db = affine_backward(da, fc_cache)
+    return dx, dw, db
+
+# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param):
+    a, fc_cache = affine_forward(x, w, b)
+    bn_out, bn_cache =  batchnorm_forward(a, gamma, beta, bn_param)
+    relu_out, relu_cache = relu_forward(bn_out)
+    cache = (fc_cache, bn_cache, relu_cache)
+    return relu_out, cache
+
+def affine_bn_relu_backward(dout, cache):
+    fc_cache, bn_cache, relu_cache = cache
+    drelu_out = relu_backward(dout, relu_cache)
+    dbn_out, dgamma, dbeta = batchnorm_backward_alt(drelu_out, bn_cache)
+    dx, dw, db = affine_backward(dbn_out, fc_cache)
+    return dx, dw, db, dgamma, dbeta
+
+
+# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+def conv_relu_forward(x, w, b, conv_param):
+    """A convenience layer that performs a convolution followed by a ReLU.
+
+    Inputs:
+    - x: Input to the convolutional layer
+    - w, b, conv_param: Weights and parameters for the convolutional layer
+
+    Returns a tuple of:
+    - out: Output from the ReLU
+    - cache: Object to give to the backward pass
+    """
+    a, conv_cache = conv_forward_fast(x, w, b, conv_param)
+    out, relu_cache = relu_forward(a)
+    cache = (conv_cache, relu_cache)
+    return out, cache
+
+
+def conv_relu_backward(dout, cache):
+    """Backward pass for the conv-relu convenience layer.
+    """
+    conv_cache, relu_cache = cache
+    da = relu_backward(dout, relu_cache)
+    dx, dw, db = conv_backward_fast(da, conv_cache)
+    return dx, dw, db
+
+
+def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param):
+    """Convenience layer that performs a convolution, a batch normalization, and a ReLU.
+
+    Inputs:
+    - x: Input to the convolutional layer
+    - w, b, conv_param: Weights and parameters for the convolutional layer
+    - pool_param: Parameters for the pooling layer
+    - gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift
+      parameters for batch normalization.
+    - bn_param: Dictionary of parameters for batch normalization.
+
+    Returns a tuple of:
+    - out: Output from the pooling layer
+    - cache: Object to give to the backward pass
+    """
+    a, conv_cache = conv_forward_fast(x, w, b, conv_param)
+    an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param)
+    out, relu_cache = relu_forward(an)
+    cache = (conv_cache, bn_cache, relu_cache)
+    return out, cache
+
+
+def conv_bn_relu_backward(dout, cache):
+    """Backward pass for the conv-bn-relu convenience layer.
+    """
+    conv_cache, bn_cache, relu_cache = cache
+    dan = relu_backward(dout, relu_cache)
+    da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache)
+    dx, dw, db = conv_backward_fast(da, conv_cache)
+    return dx, dw, db, dgamma, dbeta
+
+
+def conv_relu_pool_forward(x, w, b, conv_param, pool_param):
+    """Convenience layer that performs a convolution, a ReLU, and a pool.
+
+    Inputs:
+    - x: Input to the convolutional layer
+    - w, b, conv_param: Weights and parameters for the convolutional layer
+    - pool_param: Parameters for the pooling layer
+
+    Returns a tuple of:
+    - out: Output from the pooling layer
+    - cache: Object to give to the backward pass
+    """
+    a, conv_cache = conv_forward_fast(x, w, b, conv_param)
+    s, relu_cache = relu_forward(a)
+    out, pool_cache = max_pool_forward_fast(s, pool_param)
+    cache = (conv_cache, relu_cache, pool_cache)
+    return out, cache
+
+
+def conv_relu_pool_backward(dout, cache):
+    """Backward pass for the conv-relu-pool convenience layer.
+    """
+    conv_cache, relu_cache, pool_cache = cache
+    ds = max_pool_backward_fast(dout, pool_cache)
+    da = relu_backward(ds, relu_cache)
+    dx, dw, db = conv_backward_fast(da, conv_cache)
+    return dx, dw, db