From cbd9798447ec5832b93ca9acdd462bf397c9f2d0 Mon Sep 17 00:00:00 2001 From: JJ Date: Thu, 9 Feb 2023 05:56:31 +0900 Subject: [PATCH] [feat] implement 'sgd_momentum' function #1 --- cs231n_2022/assignment2/cs231n/optim.py | 163 ++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 cs231n_2022/assignment2/cs231n/optim.py diff --git a/cs231n_2022/assignment2/cs231n/optim.py b/cs231n_2022/assignment2/cs231n/optim.py new file mode 100644 index 0000000..c7be715 --- /dev/null +++ b/cs231n_2022/assignment2/cs231n/optim.py @@ -0,0 +1,163 @@ +import numpy as np + +""" +This file implements various first-order update rules that are commonly used +for training neural networks. Each update rule accepts current weights and the +gradient of the loss with respect to those weights and produces the next set of +weights. Each update rule has the same interface: + +def update(w, dw, config=None): + +Inputs: + - w: A numpy array giving the current weights. + - dw: A numpy array of the same shape as w giving the gradient of the + loss with respect to w. + - config: A dictionary containing hyperparameter values such as learning + rate, momentum, etc. If the update rule requires caching values over many + iterations, then config will also hold these cached values. + +Returns: + - next_w: The next point after the update. + - config: The config dictionary to be passed to the next iteration of the + update rule. + +NOTE: For most update rules, the default learning rate will probably not +perform well; however the default values of the other hyperparameters should +work well for a variety of different problems. + +For efficiency, update rules may perform in-place updates, mutating w and +setting next_w equal to w. +""" + + +def sgd(w, dw, config=None): + """ + Performs vanilla stochastic gradient descent. + + config format: + - learning_rate: Scalar learning rate. + """ + if config is None: + config = {} + config.setdefault("learning_rate", 1e-2) + + w -= config["learning_rate"] * dw + return w, config + + +def sgd_momentum(w, dw, config=None): + """ + Performs stochastic gradient descent with momentum. + + config format: + - learning_rate: Scalar learning rate. + - momentum: Scalar between 0 and 1 giving the momentum value. + Setting momentum = 0 reduces to sgd. + - velocity: A numpy array of the same shape as w and dw used to store a + moving average of the gradients. + """ + if config is None: + config = {} + config.setdefault("learning_rate", 1e-2) + config.setdefault("momentum", 0.9) + v = config.get("velocity", np.zeros_like(w)) + + next_w = None + ########################################################################### + # TODO: Implement the momentum update formula. Store the updated value in # + # the next_w variable. You should also use and update the velocity v. # + ########################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + v = config['momentum'] * v - config['learning_rate'] * dw + next_w = w + v + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + ########################################################################### + # END OF YOUR CODE # + ########################################################################### + config["velocity"] = v + + return next_w, config + + +def rmsprop(w, dw, config=None): + """ + Uses the RMSProp update rule, which uses a moving average of squared + gradient values to set adaptive per-parameter learning rates. + + config format: + - learning_rate: Scalar learning rate. + - decay_rate: Scalar between 0 and 1 giving the decay rate for the squared + gradient cache. + - epsilon: Small scalar used for smoothing to avoid dividing by zero. + - cache: Moving average of second moments of gradients. + """ + if config is None: + config = {} + config.setdefault("learning_rate", 1e-2) + config.setdefault("decay_rate", 0.99) + config.setdefault("epsilon", 1e-8) + config.setdefault("cache", np.zeros_like(w)) + + next_w = None + ########################################################################### + # TODO: Implement the RMSprop update formula, storing the next value of w # + # in the next_w variable. Don't forget to update cache value stored in # + # config['cache']. # + ########################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + ########################################################################### + # END OF YOUR CODE # + ########################################################################### + + return next_w, config + + +def adam(w, dw, config=None): + """ + Uses the Adam update rule, which incorporates moving averages of both the + gradient and its square and a bias correction term. + + config format: + - learning_rate: Scalar learning rate. + - beta1: Decay rate for moving average of first moment of gradient. + - beta2: Decay rate for moving average of second moment of gradient. + - epsilon: Small scalar used for smoothing to avoid dividing by zero. + - m: Moving average of gradient. + - v: Moving average of squared gradient. + - t: Iteration number. + """ + if config is None: + config = {} + config.setdefault("learning_rate", 1e-3) + config.setdefault("beta1", 0.9) + config.setdefault("beta2", 0.999) + config.setdefault("epsilon", 1e-8) + config.setdefault("m", np.zeros_like(w)) + config.setdefault("v", np.zeros_like(w)) + config.setdefault("t", 0) + + next_w = None + ########################################################################### + # TODO: Implement the Adam update formula, storing the next value of w in # + # the next_w variable. Don't forget to update the m, v, and t variables # + # stored in config. # + # # + # NOTE: In order to match the reference output, please modify t _before_ # + # using it in any calculations. # + ########################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + ########################################################################### + # END OF YOUR CODE # + ########################################################################### + + return next_w, config