From b8a6b90e5f9adb1319fde4e219b46931847bd345 Mon Sep 17 00:00:00 2001 From: Daniel Lowell Date: Fri, 23 Mar 2018 12:50:58 -0500 Subject: [PATCH 01/12] First round. --- doc/src/releasenotes.md | 36 ++++++++++++++++++++++++++++++++++++ include/miopen/miopen.h | 22 +++++++++++++--------- src/ocl/rnnocl.cpp | 2 +- 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/doc/src/releasenotes.md b/doc/src/releasenotes.md index 14af3fcc4f..5c06040eff 100644 --- a/doc/src/releasenotes.md +++ b/doc/src/releasenotes.md @@ -1,6 +1,42 @@ ## MIOpen Release notes +### 03/30/2018 [ 1.3.0 ] + +Notes: + +This release contain performance enhancements and bug fixes to multiple parts of the MIOpen library. +16-bit floating point (fp16) support has been added for most kernels, with the exception of RNN. + +Changed: + +- Added 2 new API for RNNs +- Added in support for uninitialized hidden states and nullptr outputs in RNNs +- Added new convolutions for 1x1 (Jing Zhang, this went in correct?)? +- Added support for Set and Scale operations for strided tensors with dimensions 1,2,3,4,5 +- Added the transpose + GEMM algorithm for 1x1 convolution (forward and backward data) +- Added fp16 support for all layers except RNNs +- Added the transpose + GEMM algorithm for 1x1 convolution (forward and backward data) +- Improved over MIOpen layer and operations' performance (I removed the host side claim) +- Improved Batch Normalization performance +- Improved RNN performance +- Fixed logic issues in get and set layer functions and related w_supertensor test +- Fixed hang in batch norm with batch sizes greater than 256 +- Fixed logic issues in get and set layer functions and related w_supertensor test +- Fixed various RNN bugs + + +Known Issues: + +- RNNs do not support fp16 + + +### 03/08/2018 [ 1.2.1 ] + +Notes: + +- This release adds support for ROCm 1.7.1. + ### 12/15/2017 [ 1.2.0 ] diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index 1ac1bcb86f..c9c70b8630 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -289,11 +289,11 @@ typedef enum { /*! @ingroup convolutions * @enum miopenConvolutionMode_t - * Convolution mode selection for convolution layer preference + * Convolution mode selection for convolution layer preference. */ typedef enum { - miopenConvolution = 0, /*!< Convolutions */ - miopenTranspose = 1, /*!< Transpose convolutions */ + miopenConvolution = 0, /*!< Cross-Correlation convolution */ + miopenTranspose = 1, /*!< Transpose convolutions -- deconvolution */ } miopenConvolutionMode_t; /*! @ingroup padding @@ -613,7 +613,7 @@ MIOPEN_EXPORT miopenStatus_t miopenDestroyConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc); /*! @enum miopenConvFwdAlgorithm_t - * Convolutional algorithm mode for forward propagation. + * Convolutional algorithm mode for forward propagation. MIOpen use cross-correlation for its convolution implementation. */ typedef enum { miopenConvolutionFwdAlgoGEMM = 0, /*!< GEMM variant */ @@ -1767,11 +1767,6 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNDescriptor(miopenRNNDescriptor_t rnnDes int* hiddenSize, int* layer); -/* // discuss later -MIOPEN_EXPORT miopenStatus_t miopenGetRNNDescriptor( - miopenRNNDescriptor_t rnnDesc, miopenRNNMode_t* mode, int* seqLength, int* layer, int* bidir -*/ - /*! @brief Destroys the tensor descriptor object * * @param rnnDesc RNN tensor descriptor type (input) @@ -1951,6 +1946,8 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNHiddenTensorSize(miopenHandle_t handle, * For bi-directional RNNs the backwards in time direction is numbered as the layer * directly after the forward in time direction. * + * When inputSkip mode is selected there is no input layer matrix operation, therefore + * miopenGetRNNLayerParamSize will return zero for matrices associated with the inputs. * * @param handle MIOpen handle (input) * @param rnnDesc RNN layer descriptor type (input) @@ -1993,6 +1990,8 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParamSize(miopenHandle_t handle, * For bi-directional RNNs the backwards in time direction is numbered as the layer * directly after the forward in time direction. * + * When inputSkip mode is selected there is no input layer matrix operation, therefore + * miopenGetRNNLayerBiasSize will return zero for biases associated with the inputs. * * @param handle MIOpen handle (input) * @param rnnDesc RNN layer descriptor type (input) @@ -2044,6 +2043,8 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBiasSize(miopenHandle_t handle, * nullptr then only the paramDesc is populated and returned. The size in bytes of the * layer parameter matrix can be determined by using miopenGetRNNLayerParamSize(). * + * Note: When inputSkip mode is selected there is no input layer matrix operation, therefore + * miopenGetRNNLayerParam will return a error status miopenStatusBadParm. * * @param handle MIOpen handle (input) * @param rnnDesc RNN layer descriptor type (input) @@ -2104,6 +2105,9 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParam(miopenHandle_t handle, * nullptr then only the biasDesc is populated and returned. The size in bytes of the * layer bias can be determined by using miopenGetRNNLayerBiasSize(). * + * Note: When inputSkip mode is selected there is no input layer matrix operation, + * and therefore no associated memory. In this case miopenGetRNNLayerBias will return + * a error status miopenStatusBadParm. * * @param handle MIOpen handle (input) * @param rnnDesc RNN layer descriptor type (input) diff --git a/src/ocl/rnnocl.cpp b/src/ocl/rnnocl.cpp index c1edb6f444..6a42a2fcec 100644 --- a/src/ocl/rnnocl.cpp +++ b/src/ocl/rnnocl.cpp @@ -3682,7 +3682,7 @@ void RNNDescriptor::RNNBackwardWeights(Handle& handle, { int in_bias_val = inputMode == miopenRNNskip ? 0 : wei_stride; - hid_shift = li * batch_n * hy_stride; + hid_shift = li * batch_n * hy_stride; wei_shift = (li == 0) ? (wei_shift_bias + in_bias_val) : (wei_shift_bias + in_bias_val + li * 2 * wei_stride); From fad0fd92088d6725c08977d53c81c9435b51abb7 Mon Sep 17 00:00:00 2001 From: Daniel Lowell Date: Fri, 23 Mar 2018 17:15:13 -0500 Subject: [PATCH 02/12] Added in documentation updates for 1.3.0. --- README.md | 44 +++--- doc/src/cache.md | 2 +- doc/src/perfdatabase.md | 15 ++ doc/src/releasenotes.md | 23 ++-- doc/src/rnn.rst | 12 +- include/miopen/miopen.h | 299 +++++++++++++++++++++++++--------------- 6 files changed, 248 insertions(+), 147 deletions(-) diff --git a/README.md b/README.md index 1da29d5bb6..b6ff4241d5 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,27 @@ AMD's library for high peformance machine learning primitives. MIOpen supports t * [Boost](http://www.boost.org/) at least version 1.58 * MIOpen uses `boost-system` and `boost-filesystem` packages to enable persistent [kernel cache](https://github.com/ROCmSoftwarePlatform/MIOpen/blob/master/doc/src/cache.md) -Instructions to install the above dependencies are present in this [section](#installing-the-dependencies). +## Installing the dependencies + +The dependencies can be installed with the `install_deps.cmake`, script: + +``` +cmake -P install_deps.cmake +``` + +This will install by default to `/usr/local` but it can be installed in another location with `--prefix` argument: + +``` +cmake -P install_deps.cmake --prefix /some/local/dir +``` + +If Ubuntu v16 is used then the `OpenSSL` and `Boost` packages can also be installed by: +``` +sudo apt-get install libssl-dev +sudo apt-get install libboost-dev +sudo apt-get install libboost-system-dev +sudo apt-get install libboost-filesystem-dev +``` ## Installing MIOpen with pre-built packages @@ -167,24 +187,12 @@ Also, githooks can be installed to format the code per-commit: ./.githooks/install ``` -## Installing the dependencies +## Using docker -The dependencies can be installed with the `install_deps.cmake`, script: +The easiest way is to use docker. You can build the top-level docker file: -``` -cmake -P install_deps.cmake -``` + docker build -t miopen . -This will install by default to `/usr/local` but it can be installed in another location with `--prefix` argument: +Then to enter the developement environment use `docker run`: -``` -cmake -P install_deps.cmake --prefix /some/local/dir -``` - -If Ubuntu v16 is used then the `OpenSSL` and `Boost` packages can also be installed by: -``` -sudo apt-get install libssl-dev -sudo apt-get install libboost-dev -sudo apt-get install libboost-system-dev -sudo apt-get install libboost-filesystem-dev -``` + docker run --device='/dev/kfd' --device='/dev/dri' -v=`pwd`:/data -w /data --group-add video -it miopen diff --git a/doc/src/cache.md b/doc/src/cache.md index 71a7b73a29..4ab864f724 100644 --- a/doc/src/cache.md +++ b/doc/src/cache.md @@ -6,7 +6,7 @@ MIOpen will cache binary kernels to disk, so they don't need to be compiled the Clear the cache --------------- -The cache can be cleared by simply deleting the cache directory(ie `$HOME/.cache/miopen`). This should only be needed for development purposes or to free disk space. The cache does not need to be cleared when upgrading MIOpen. +The cache can be cleared by simply deleting the cache directory (i.e., `$HOME/.cache/miopen`). This should only be needed for development purposes or to free disk space. The cache does not need to be cleared when upgrading MIOpen. Disabling the cache ------------------- diff --git a/doc/src/perfdatabase.md b/doc/src/perfdatabase.md index ce16f88711..11b0ce694c 100644 --- a/doc/src/perfdatabase.md +++ b/doc/src/perfdatabase.md @@ -11,6 +11,9 @@ MIOpen performs Exhaustive Search only if explicitly requested via MIOpen API an The optimized solution found during the successful Search process is written into the PerfDb for future re-use. That is why MIOpen will not Search for optimized solution more than once for a given problem in this mode. +See documentation about miopenFind*() API calls for more info on how Search can be explicitly requested. + + **DB_UPDATE (2)** Similar to NONE, but Search will NOT be skipped if PerfDb contains relevant record. If Search is requested via MIOpen API, then MIOpen will perform the Search and update PerfDb. @@ -34,3 +37,15 @@ Note: This mode is intended for tuning the MIOpen installation. When MIOpen is i **DB_CLEAN (5)** MIOpen removes relevant records from the PerfDb instead of just reading and using those. Search is blocked, even if explicitly requested. + +## MIOPEN_FIND_ENFORCE_SCOPE + +This variable allows to limit the scope of `MIOPEN_FIND_ENFORCE`, so that only forward, backward data or backward weights convolutions will be affected. Both symbolic and numeric values are supported, as shown below. + +**ALL (1)** `MIOPEN_FIND_ENFORCE` affects all convolutions. This is the default. + +**CONV_FWD (2)** `MIOPEN_FIND_ENFORCE` affects only Forward convolutions. + +**CONV_BWD (3)** `MIOPEN_FIND_ENFORCE` affects only Backward Data convolutions. + +**CONV_WRW (3)** `MIOPEN_FIND_ENFORCE` affects only Backward With Regard to Weights (a.k.a WRW) convolutions. \ No newline at end of file diff --git a/doc/src/releasenotes.md b/doc/src/releasenotes.md index 5c06040eff..9c0d6bf705 100644 --- a/doc/src/releasenotes.md +++ b/doc/src/releasenotes.md @@ -5,30 +5,27 @@ Notes: -This release contain performance enhancements and bug fixes to multiple parts of the MIOpen library. -16-bit floating point (fp16) support has been added for most kernels, with the exception of RNN. +- This release adds fp16 support for Inference using CNNs +- Performance improvements for RNNs +- Performance improvements for convolutions using 1x1 filters +- Performance improvement for Batch Normalization +- Bug fixes for various components of MIOpen Changed: -- Added 2 new API for RNNs +- Added 2 new API for RNNs: miopenGetRNNLayerParamOffset and miopenGetRNNLayerBiasOffset - Added in support for uninitialized hidden states and nullptr outputs in RNNs -- Added new convolutions for 1x1 (Jing Zhang, this went in correct?)? - Added support for Set and Scale operations for strided tensors with dimensions 1,2,3,4,5 -- Added the transpose + GEMM algorithm for 1x1 convolution (forward and backward data) -- Added fp16 support for all layers except RNNs -- Added the transpose + GEMM algorithm for 1x1 convolution (forward and backward data) -- Improved over MIOpen layer and operations' performance (I removed the host side claim) -- Improved Batch Normalization performance -- Improved RNN performance +- Added multi-thread and multi-process support for the performance database +- Improved performance for OpTensor +- Fixed bug in convolutions for backward bias - Fixed logic issues in get and set layer functions and related w_supertensor test - Fixed hang in batch norm with batch sizes greater than 256 -- Fixed logic issues in get and set layer functions and related w_supertensor test -- Fixed various RNN bugs - Known Issues: - RNNs do not support fp16 +- Training with CNNs does not support fp16 ### 03/08/2018 [ 1.2.1 ] diff --git a/doc/src/rnn.rst b/doc/src/rnn.rst index b57fc80c18..3a53861c97 100644 --- a/doc/src/rnn.rst +++ b/doc/src/rnn.rst @@ -51,7 +51,6 @@ miopenGetRNNDescriptor .. doxygenfunction:: miopenGetRNNDescriptor - miopenDestroyRNNDescriptor -------------------------- @@ -135,6 +134,17 @@ miopenSetRNNLayerBias .. doxygenfunction:: miopenSetRNNLayerBias +miopenGetRNNLayerParamOffset +---------------------------- + +.. doxygenfunction:: miopenGetRNNLayerParamOffset + + +miopenGetRNNLayerBiasOffset +--------------------------- + +.. doxygenfunction:: miopenGetRNNLayerBiasOffset + miopenRNNForwardTraining ------------------------ diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index c9c70b8630..bfe9054236 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -698,10 +698,10 @@ miopenConvolutionForwardGetWorkSpaceSize(miopenHandle_t handle, * to execute this function, miopenConvolutionForwardGetWorkSpaceSize() must be * run to determine the required memory for this search. * - * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If a + * * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If a * configuration match is not found, a default configuration will be returned. * - * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. If + * * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. If * a match is not found, an exhaustive search is performed by running individual algorithms. * * @param handle MIOpen handle (input) @@ -831,10 +831,10 @@ miopenConvolutionBackwardDataGetWorkSpaceSize(miopenHandle_t handle, * execute this function, miopenConvolutionBackwardsDataGetWorkSpaceSize() must be run to determine * the required memory for this search. * - * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If a + * * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If a * configuration match is not found, a default configuration will be returned. * - * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. If + * * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. If * a match is not found, an exhaustive search is performed by running individual algorithms. * * @param handle MIOpen handle (input) @@ -944,10 +944,10 @@ miopenConvolutionBackwardWeightsGetWorkSpaceSize(miopenHandle_t handle, * execute this function, miopenConvolutionBackwardsWeightsGetWorkSpaceSize() must be run to * determine the required memory for this search. * - * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If a + * * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If a * configuration match is not found, a default configuration will be returned. * - * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. If + * * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. If * a match is not found, an exhaustive search is performed by running individual algorithms. * * @param handle MIOpen handle (input) @@ -1357,6 +1357,7 @@ MIOPEN_EXPORT miopenStatus_t miopenDestroyLRNDescriptor(miopenLRNDescriptor_t lr * * This function takes the input tensor descriptor and outputs a derived tensor for the * normalization scale (gamma) and shift (beta) tensors. + * * For an input tensor NCHW and spatial mode, the output derived tensor is 1C11, while for * per-activation the derived tensor is 1CHW. * @@ -1374,8 +1375,10 @@ MIOPEN_EXPORT miopenStatus_t miopenDeriveBNTensorDescriptor(miopenTensorDescript * Batch normalization pass for forward training pass. * Takes in batch normalization mode bn_mode and input tensor x, output tensor y, bnBias and bnScale * with their descriptor. + * * If either resultSaveMean, or resultSaveInvVariance are null pointers then the values for the mean * and inverse variance will not be used. + * * Likewise, if either resultRunningMean, or resultRunningVariance are null pointers then the values * for the running mean and variance will not be saved. * Running averages and variances are scaled using an exponential averaging factor: \f[ @@ -1429,6 +1432,7 @@ miopenBatchNormalizationForwardTraining(miopenHandle_t handle, * Batch normalization pass for forward inference pass. * Takes in batch normalization mode bn_mode and input tensor x, output tensor y, bnBias and bnScale * with their descriptor. + * * If either estimatedMean, or estimatedVariance are null pointers then the values for the mean and * variance will not be used. * @@ -1469,9 +1473,11 @@ miopenBatchNormalizationForwardInference(miopenHandle_t handle, * * Batch normalization pass for backwards propagation training pass. * The method for backwards propagation batch normalization. + * * Takes in batch normalization mode bn_mode and input tensor data x, input activation tensor dy, * output tensor dx, the learned tensors resultBNBiasDiff and resultBNScaleDiff with their * descriptor. + * * If BOTH savedMean, and savedVariance are not null pointers then the method will use the saved * mean and variance calculated by the forward training phase. * @@ -1694,8 +1700,8 @@ MIOPEN_EXPORT miopenStatus_t miopenSoftmaxBackward(miopenHandle_t handle, * RNN mode selection for rnn layer preference */ typedef enum { - miopenRNNRELU = 0, /*!< RNN ReLU squash */ - miopenRNNTANH = 1, /*!< RNN tanh squash */ + miopenRNNRELU = 0, /*!< RNN ReLU activation */ + miopenRNNTANH = 1, /*!< RNN tanh activation */ miopenLSTM = 2, /*!< LSTM */ miopenGRU = 3, /*!< GRU */ } miopenRNNMode_t; @@ -1786,7 +1792,7 @@ MIOPEN_EXPORT miopenStatus_t miopenDestroyRNNDescriptor(miopenRNNDescriptor_t rn * @param rnnMode RNN model type (input) * @param biasMode RNN bias included (input) * @param algo RNN algorithm selected (input) - * @param dataType fp32 or fp16 datatype mode, only fp 16 currently supported for RNNs (input) + * @param dataType Only fp32 currently supported for RNNs (input) * @return miopenStatus_t */ MIOPEN_EXPORT miopenStatus_t miopenSetRNNDescriptor(miopenRNNDescriptor_t rnnDesc, @@ -1930,25 +1936,28 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNHiddenTensorSize(miopenHandle_t handle, * For miopenLSTM paramID 0 to 3 refer to the weight matrices associated * with the input GEMM, 4-7 are associated with matrices associated with the * hidden state GEMM. - * ParamID 0 and 4 are for the input gate operations. - * ParamID 1 and 5 are for the forget gate operations. - * ParamID 2 and 6 are for the memory gate operations. - * ParamID 3 and 7 are for the output gate operations. * + * * paramID 0 and 4 are for the input gate operations. + * + * * paramID 1 and 5 are for the forget gate operations. + * + * * paramID 2 and 6 are for the memory gate operations. + * + * * paramID 3 and 7 are for the output gate operations. * - * For miopenGRU paramID 0 to 2 refer to the the weight matrices associated - * with the input GEMM, while 5 through 6 are associated with the hidden state + * For miopenGRU paramID 0 to 2 refer to the weight matrix offset associated + * with the input GEMM, while 3 through 5 are associated with the hidden state * GEMM. - * ParamID 0 and 4 are for the reset gate operations. - * ParamID 1 and 5 are for the update gate operations. - * ParamID 2 and 6 are for the memory gate operations. + * + * * paramID 0 and 3 are for the reset gate operations. + * + * * paramID 1 and 4 are for the update gate operations. + * + * * paramID 2 and 5 are for the memory gate operations. * * For bi-directional RNNs the backwards in time direction is numbered as the layer * directly after the forward in time direction. * - * When inputSkip mode is selected there is no input layer matrix operation, therefore - * miopenGetRNNLayerParamSize will return zero for matrices associated with the inputs. - * * @param handle MIOpen handle (input) * @param rnnDesc RNN layer descriptor type (input) * @param layer The layer number in the RNN stack (input) @@ -1970,29 +1979,30 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParamSize(miopenHandle_t handle, * weight matrix associated with the in input GEMM, while biasID == 1 retrieves * the bias associated with the hidden state GEMM. * - * For miopenLSTM paramID 0 to 3 refer to the biases associated + * For miopenLSTM biasID 0 to 3 refer to the biases associated * with the input GEMM, 4-7 are associated with biases associated with the * hidden state GEMM. - * biasID 0 and 4 are for the input gate operations. - * biasID 1 and 5 are for the forget gate operations. - * biasID 2 and 6 are for the memory gate operations. - * biasID 3 and 7 are for the output gate operations. * + * * biasID 0 and 4 are for the input gate operations. * - * For miopenGRU biasID 0 to 2 refer to the biases associated - * with the input GEMM, while 5 through 6 are associated with the hidden state - * GEMM. - * biasID 0 and 4 are for the reset gate operations. - * biasID 1 and 5 are for the update gate operations. - * biasID 2 and 6 are for the memory gate operations. + * * biasID 1 and 5 are for the forget gate operations. + * + * * biasID 2 and 6 are for the memory gate operations. * + * * biasID 3 and 7 are for the output gate operations. + * + * For miopenGRU biasID 0 to 2 refer to the biases associated with the input GEMM, + * while 3 through 5 are associated with the hidden state GEMM. + * + * * biasID 0 and 3 are for the reset gate operations. + * + * * biasID 1 and 4 are for the update gate operations. + * + * * biasID 2 and 5 are for the memory gate operations. * * For bi-directional RNNs the backwards in time direction is numbered as the layer * directly after the forward in time direction. * - * When inputSkip mode is selected there is no input layer matrix operation, therefore - * miopenGetRNNLayerBiasSize will return zero for biases associated with the inputs. - * * @param handle MIOpen handle (input) * @param rnnDesc RNN layer descriptor type (input) * @param layer The layer number in the RNN stack (input) @@ -2018,18 +2028,24 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBiasSize(miopenHandle_t handle, * For miopenLSTM paramID 0 to 3 refer to the weight matrices associated * with the input GEMM, 4-7 are associated with matrices associated with the * hidden state GEMM. - * ParamID 0 and 4 are for the input gate operations. - * ParamID 1 and 5 are for the forget gate operations. - * ParamID 2 and 6 are for the memory gate operations. - * ParamID 3 and 7 are for the output gate operations. * + * * paramID 0 and 4 are for the input gate operations. + * + * * paramID 1 and 5 are for the forget gate operations. + * + * * paramID 2 and 6 are for the memory gate operations. + * + * * paramID 3 and 7 are for the output gate operations. * - * For miopenGRU paramID 0 to 2 refer to the weight matrices associated - * with the input GEMM, while 5 through 6 are associated with the hidden state + * For miopenGRU paramID 0 to 2 refer to the weight matrix offset associated + * with the input GEMM, while 3 through 5 are associated with the hidden state * GEMM. - * ParamID 0 and 4 are for the reset gate operations. - * ParamID 1 and 5 are for the update gate operations. - * ParamID 2 and 6 are for the memory gate operations. + * + * * paramID 0 and 3 are for the reset gate operations. + * + * * paramID 1 and 4 are for the update gate operations. + * + * * paramID 2 and 5 are for the memory gate operations. * * For bi-directional RNNs the backwards in time direction is numbered as the layer * directly after the forward in time direction. @@ -2043,8 +2059,9 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBiasSize(miopenHandle_t handle, * nullptr then only the paramDesc is populated and returned. The size in bytes of the * layer parameter matrix can be determined by using miopenGetRNNLayerParamSize(). * - * Note: When inputSkip mode is selected there is no input layer matrix operation, therefore - * miopenGetRNNLayerParam will return a error status miopenStatusBadParm. + * Note: When inputSkip mode is selected there is no input layer matrix operation, + * and therefore no associated memory. In this case miopenGetRNNLayerParam() will return + * a error status miopenStatusBadParm for input paramID associated with the input GEMM. * * @param handle MIOpen handle (input) * @param rnnDesc RNN layer descriptor type (input) @@ -2076,22 +2093,27 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParam(miopenHandle_t handle, * bias associated with the in input GEMM, while biasID == 1 retrieves * the bias associated with the hidden state GEMM. * - * For miopenLSTM paramID 0 to 3 refer to the biases associated + * For miopenLSTM biasID 0 to 3 refer to the biases associated * with the input GEMM, 4-7 are associated with biases associated with the * hidden state GEMM. - * biasID 0 and 4 are for the input gate operations. - * biasID 1 and 5 are for the forget gate operations. - * biasID 2 and 6 are for the memory gate operations. - * biasID 3 and 7 are for the output gate operations. * + * * biasID 0 and 4 are for the input gate operations. + * + * * biasID 1 and 5 are for the forget gate operations. + * + * * biasID 2 and 6 are for the memory gate operations. + * + * * biasID 3 and 7 are for the output gate operations. * - * For miopenGRU biasID 0 to 2 refer to the biases associated - * with the input GEMM, while 5 through 6 are associated with the hidden state - * GEMM. - * biasID 0 and 4 are for the reset gate operations. - * biasID 1 and 5 are for the update gate operations. - * biasID 2 and 6 are for the memory gate operations. * + * For miopenGRU biasID 0 to 2 refer to the biases associated with the input GEMM, + * while 3 through 5 are associated with the hidden state GEMM. + * + * * biasID 0 and 3 are for the reset gate operations. + * + * * biasID 1 and 4 are for the update gate operations. + * + * * biasID 2 and 5 are for the memory gate operations. * * For bi-directional RNNs the backwards in time direction is numbered as the layer * directly after the forward in time direction. @@ -2106,8 +2128,8 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParam(miopenHandle_t handle, * layer bias can be determined by using miopenGetRNNLayerBiasSize(). * * Note: When inputSkip mode is selected there is no input layer matrix operation, - * and therefore no associated memory. In this case miopenGetRNNLayerBias will return - * a error status miopenStatusBadParm. + * and therefore no associated memory. In this case miopenGetRNNLayerBias() will return + * a error status miopenStatusBadParm for input biasID associated with the input GEMM. * * @param handle MIOpen handle (input) * @param rnnDesc RNN layer descriptor type (input) @@ -2142,18 +2164,24 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBias(miopenHandle_t handle, * For miopenLSTM paramID 0 to 3 refer to the weight matrix offsets associated * with the input GEMM, 4-7 are associated with matrix offset associated with the * hidden state GEMM. - * ParamID 0 and 4 are for the input gate operations. - * ParamID 1 and 5 are for the forget gate operations. - * ParamID 2 and 6 are for the memory gate operations. - * ParamID 3 and 7 are for the output gate operations. * + * * paramID 0 and 4 are for the input gate operations. + * + * * paramID 1 and 5 are for the forget gate operations. + * + * * paramID 2 and 6 are for the memory gate operations. + * + * * paramID 3 and 7 are for the output gate operations. * * For miopenGRU paramID 0 to 2 refer to the weight matrix offset associated - * with the input GEMM, while 5 through 6 are associated with the hidden state + * with the input GEMM, while 3 through 5 are associated with the hidden state * GEMM. - * ParamID 0 and 4 are for the reset gate operations. - * ParamID 1 and 5 are for the update gate operations. - * ParamID 2 and 6 are for the memory gate operations. + * + * * paramID 0 and 3 are for the reset gate operations. + * + * * paramID 1 and 4 are for the update gate operations. + * + * * paramID 2 and 5 are for the memory gate operations. * * For bi-directional RNNs the backwards in time direction is numbered as the layer * directly after the forward in time direction. @@ -2165,6 +2193,10 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBias(miopenHandle_t handle, * The argument layerParamOffset should either be nullptr, or an address to place the * offset. If layerParamOffset is nullptr then only the paramDesc is populated and returned. * + * Note: When inputSkip mode is selected there is no input layer matrix operation, + * and therefore no associated memory. In this case miopenGetRNNLayerParamOffset() will return + * a error status miopenStatusBadParm for input paramID associated with the input GEMM. + * * * @param rnnDesc RNN layer descriptor type (input) * @param layer The layer number in the RNN stack (input) @@ -2189,21 +2221,25 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParamOffset(miopenRNNDescriptor_t * bias associated with the in input GEMM, while biasID == 1 retrieves * the weight matrix associated with the hidden state GEMM. * - * For miopenLSTM paramID 0 to 3 refer to the bias offset associated + * For miopenLSTM biasID 0 to 3 refer to the bias offset associated * with the input GEMM, 4-7 are the bias offsets associated with the hidden state GEMM. - * biasID 0 and 4 are for the input gate operations. - * biasID 1 and 5 are for the forget gate operations. - * biasID 2 and 6 are for the memory gate operations. - * biasID 3 and 7 are for the output gate operations. * + * * biasID 0 and 4 are for the input gate operations. * - * For miopenGRU biasID 0 to 2 refer to the bias offsets associated - * with the input GEMM, while 5 through 6 are associated with the hidden state - * GEMM. - * biasID 0 and 4 are for the reset gate operations. - * biasID 1 and 5 are for the update gate operations. - * biasID 2 and 6 are for the memory gate operations. + * * biasID 1 and 5 are for the forget gate operations. + * + * * biasID 2 and 6 are for the memory gate operations. + * + * * biasID 3 and 7 are for the output gate operations. * + * For miopenGRU biasID 0 to 2 refer to the biases associated with the input GEMM, + * while 3 through 5 are associated with the hidden state GEMM. + * + * * biasID 0 and 3 are for the reset gate operations. + * + * * biasID 1 and 4 are for the update gate operations. + * + * * biasID 2 and 5 are for the memory gate operations. * * For bi-directional RNNs the backwards in time direction is numbered as the layer * directly after the forward in time direction. @@ -2215,6 +2251,9 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParamOffset(miopenRNNDescriptor_t * The argument layerBiasOffset should either be nullptr, or point to an output address. * If layerBias is nullptr then only the biasDesc is populated and returned. * + * Note: When inputSkip mode is selected there is no input layer matrix operation, + * and therefore no associated memory. In this case miopenGetRNNLayerBiasOffset() will return + * a error status miopenStatusBadParm for input biasID associated with the input GEMM. * * @param rnnDesc RNN layer descriptor type (input) * @param layer The layer number in the RNN stack (input) @@ -2243,18 +2282,25 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBiasOffset(miopenRNNDescriptor_t r * For miopenLSTM paramID 0 to 3 refer to the weight matrices associated * with the input GEMM, 4-7 are associated with matrices associated with the * hidden state GEMM. - * ParamID 0 and 4 are for the input gate operations. - * ParamID 1 and 5 are for the forget gate operations. - * ParamID 2 and 6 are for the memory gate operations. - * ParamID 3 and 7 are for the output gate operations. * + * * paramID 0 and 4 are for the input gate operations. + * + * * paramID 1 and 5 are for the forget gate operations. + * + * * paramID 2 and 6 are for the memory gate operations. + * + * * paramID 3 and 7 are for the output gate operations. * - * For miopenGRU paramID 0 to 2 refer to the weight matrices associated - * with the input GEMM, while 5 through 6 are associated with the hidden state + * + * For miopenGRU paramID 0 to 2 refer to the weight matrix offset associated + * with the input GEMM, while 3 through 5 are associated with the hidden state * GEMM. - * ParamID 0 and 4 are for the reset gate operations. - * ParamID 1 and 5 are for the update gate operations. - * ParamID 2 and 6 are for the memory gate operations. + * + * * paramID 0 and 3 are for the reset gate operations. + * + * * paramID 1 and 4 are for the update gate operations. + * + * * paramID 2 and 5 are for the memory gate operations. * * For bi-directional RNNs the backwards in time direction is numbered as the layer * directly after the forward in time direction. @@ -2262,6 +2308,9 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBiasOffset(miopenRNNDescriptor_t r * The input argument paramDesc is a previously populated tensor descriptor typically * by first calling miopenGetRNNLayerParam(). * + * Note: When inputSkip mode is selected there is no input layer matrix operation, + * and therefore no associated memory. In this case miopenSetRNNLayerParam() will return + * a error status miopenStatusBadParm for input paramID associated with the input GEMM. * * @param handle MIOpen handle (input) * @param rnnDesc RNN layer descriptor type (input) @@ -2292,22 +2341,27 @@ MIOPEN_EXPORT miopenStatus_t miopenSetRNNLayerParam(miopenHandle_t handle, * weight matrix associated with the in input GEMM, while biasID == 1 retrieves * the bias associated with the hidden state GEMM. * - * For miopenLSTM paramID 0 to 3 refer to the biases associated + * For miopenLSTM biasID 0 to 3 refer to the biases associated * with the input GEMM, 4-7 are associated with the biases associated with the * hidden state GEMM. - * biasID 0 and 4 are for the input gate operations. - * biasID 1 and 5 are for the forget gate operations. - * biasID 2 and 6 are for the memory gate operations. - * biasID 3 and 7 are for the output gate operations. * + * * biasID 0 and 4 are for the input gate operations. * - * For miopenGRU biasID 0 to 2 refer to the biases associated - * with the input GEMM, while 5 through 6 are associated with the hidden state - * GEMM. - * biasID 0 and 4 are for the reset gate operations. - * biasID 1 and 5 are for the update gate operations. - * biasID 2 and 6 are for the memory gate operations. + * * biasID 1 and 5 are for the forget gate operations. * + * * biasID 2 and 6 are for the memory gate operations. + * + * * biasID 3 and 7 are for the output gate operations. + * + * + * For miopenGRU biasID 0 to 2 refer to the biases associated with the input GEMM, + * while 3 through 5 are associated with the hidden state GEMM. + * + * * biasID 0 and 3 are for the reset gate operations. + * + * * biasID 1 and 4 are for the update gate operations. + * + * * biasID 2 and 5 are for the memory gate operations. * * For bi-directional RNNs the backwards in time direction is numbered as the layer * directly after the forward in time direction. @@ -2315,6 +2369,9 @@ MIOPEN_EXPORT miopenStatus_t miopenSetRNNLayerParam(miopenHandle_t handle, * The input argument biasDesc is a previously populated tensor descriptor typically * by first calling miopenGetRNNLayeBias(). * + * Note: When inputSkip mode is selected there is no input layer matrix operation, + * and therefore no associated memory. In this case miopenSetRNNLayerBias will return + * a error status miopenStatusBadParm for input biasID associated with the input GEMM. * * @param handle MIOpen handle (input) * @param rnnDesc RNN layer descriptor type (input) @@ -2355,13 +2412,15 @@ MIOPEN_EXPORT miopenStatus_t miopenSetRNNLayerBias(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param hx Pointer to the hidden layer input tensor (input) + * @param hx Pointer to the hidden layer input tensor. If hx is NULL, + * then the initial hidden state will be zero initialized. (input) * @param cxDesc A cell tensor descriptor that has as its first dimension * of the number of layers if the direction mode is unidirectional and twice the * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param cx Pointer to the cell layer input tensor (input) + * @param cx Pointer to the cell layer input tensor. If cx is NULL, + * then the initial cell state will be zero initialized. (input) * @param wDesc A weights tensor descriptor (input) * @param w Pointer to input weights tensor (input) * @param yDesc An array of fully packed tensor descriptors associated @@ -2377,13 +2436,15 @@ MIOPEN_EXPORT miopenStatus_t miopenSetRNNLayerBias(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param hy Pointer to the hidden layer output tensor (output) + * @param hy Pointer to the hidden layer output tensor. If hy is NULL, + * then the final hidden state will not be saved. (output) * @param cyDesc A cell tensor descriptor that has as its first dimension * of the number of layers if the direction mode is unidirectional and twice the * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param cy Pointer to the cell layer output tensor (output) + * @param cy Pointer to the cell layer output tensor. If hy is NULL, + * then the final cell state will not be saved. (output) * @param workSpace Pointer to memory allocated for forward training (input) * @param workSpaceNumBytes Number of allocated bytes in memory for the workspace (input) * @param reserveSpace Pointer to memory allocated for random states (input / output) @@ -2440,7 +2501,8 @@ MIOPEN_EXPORT miopenStatus_t miopenRNNForwardTraining(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param dcy Pointer to the cell layer input tensor (input) + * @param dcy Pointer to the cell layer input tensor. If dcy is NULL, + * then the initial delta cell state will be zero initialized. (input) * @param wDesc A weights tensor descriptor (input) * @param w Pointer to input weights tensor (input) * @param hxDesc An input hidden tensor descriptor that has as its first dimension @@ -2448,13 +2510,15 @@ MIOPEN_EXPORT miopenStatus_t miopenRNNForwardTraining(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param hx Pointer to output tensor (input) + * @param hx Pointer to the hidden layer input tensor. If hx is NULL, + * then the initial hidden state will be zero initialized. (input) * @param cxDesc A input cell tensor descriptor that has as its first dimension * of the number of layers if the direction mode is unidirectional and twice the * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param cx Pointer to the hidden layer output tensor (input) + * @param cx Pointer to the hidden layer input tensor. If cx is NULL, + * then the initial cell state will be zero initialized. (input) * @param dxDesc An array of tensor descriptors. These are the * input descriptors to each time step. The first dimension of each descriptor is the * batch size and may decrease from element n to element n+1 and not increase in size. @@ -2466,13 +2530,15 @@ MIOPEN_EXPORT miopenStatus_t miopenRNNForwardTraining(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param dhx Pointer to the cell layer output tensor (output) + * @param dhx Pointer to the delta hidden layer output tensor. If dhx is NULL + * the hidden gradient will not ouput. (output) * @param dcxDesc A tensor descriptor that has as its first dimension * of the number of layers if the direction mode is unidirectional and twice the * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param dcx Pointer to the cell layer output tensor (output) + * @param dcx Pointer to the cell layer output tensor. If dcx is NULL + * the cell gradient will not ouput. (output) * @param workSpace Pointer to memory allocated for forward training (input) * @param workSpaceNumBytes Number of allocated bytes in memory for the workspace (input) * @param reserveSpace Pointer to memory allocated for random states (input / output) @@ -2525,7 +2591,8 @@ MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardData(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param hx Pointer to the hidden layer input tensor (input) + * @param hx Pointer to the hidden layer input tensor. If hx is NULL, + * then the initial hidden state will be zero initialized. (input) * @param yDesc An array of fully packed tensor descriptors associated * with the output from each time step. The first dimension of the tensor descriptors * must equal the first dimension of the first descriptor (batch size) in the xDesc @@ -2533,9 +2600,9 @@ MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardData(miopenHandle_t handle, * depends on the direction mode selected. If the direction mode is unidirectional, * the second dimension is the hiddenSize. If direction mode is bidirectional * the second dimension is twice the hiddenSize. (input) - * @param y Pointer to the cell layer input tensor (input) + * @param y Pointer to the output tensor (input) * @param dwDesc A weights tensor descriptor (input) - * @param dw Pointer to input weights tensor (output) + * @param dw Pointer to input weights tensor (input / output) * @param workSpace Pointer to memory allocated for forward training (input) * @param workSpaceNumBytes Number of allocated bytes in memory for the workspace (input) * @param reserveSpace Pointer to memory allocated for random states (input) @@ -2576,13 +2643,15 @@ MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardWeights(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param hx Pointer to the hidden layer input tensor (input) + * @param hx Pointer to the hidden layer input tensor. If hx is NULL, + * then the initial hidden state will be zero initialized. (input) * @param cxDesc A cell tensor descriptor that has as its first dimension * of the number of layers if the direction mode is unidirectional and twice the * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param cx Pointer to the cell layer input tensor (input) + * @param cx Pointer to the cell layer input tensor. If cx is NULL, + * then the initial cell state will be zero initialized. (input) * @param wDesc A weights tensor descriptor (input) * @param w Pointer to input weights tensor (input) * @param yDesc An array of fully packed tensor descriptors associated @@ -2598,13 +2667,15 @@ MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardWeights(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param hy Pointer to the hidden layer output tensor (output) + * @param hy Pointer to the hidden layer output tensor. If hy is NULL, + * then the final hidden state will not be saved. (output) * @param cyDesc A output cell tensor descriptor that has as its first dimension * of the number of layers if the direction mode is unidirectional and twice the * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param cy Pointer to the cell layer output tensor (output) + * @param cy Pointer to the cell layer output tensor. If cy is NULL, + * then the final cell state will not be saved. (output) * @param workSpace Pointer to memory allocated for forward training (input) * @param workSpaceNumBytes Number of allocated bytes in memory for the workspace (input) * @return miopenStatus_t From 91febb1c4844fc189ef2b931cbd606b61a60ccb5 Mon Sep 17 00:00:00 2001 From: Daniel Lowell Date: Sun, 25 Mar 2018 20:27:40 -0500 Subject: [PATCH 03/12] Formatting --- include/miopen/miopen.h | 109 +++++++++++++++++++++------------------- test/rnn_vanilla.cpp | 2 +- 2 files changed, 59 insertions(+), 52 deletions(-) diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index bfe9054236..1954adf61c 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -613,7 +613,8 @@ MIOPEN_EXPORT miopenStatus_t miopenDestroyConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc); /*! @enum miopenConvFwdAlgorithm_t - * Convolutional algorithm mode for forward propagation. MIOpen use cross-correlation for its convolution implementation. + * Convolutional algorithm mode for forward propagation. MIOpen use cross-correlation for its + * convolution implementation. */ typedef enum { miopenConvolutionFwdAlgoGEMM = 0, /*!< GEMM variant */ @@ -698,10 +699,12 @@ miopenConvolutionForwardGetWorkSpaceSize(miopenHandle_t handle, * to execute this function, miopenConvolutionForwardGetWorkSpaceSize() must be * run to determine the required memory for this search. * - * * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If a + * * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If + * a * configuration match is not found, a default configuration will be returned. * - * * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. If + * * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. + * If * a match is not found, an exhaustive search is performed by running individual algorithms. * * @param handle MIOpen handle (input) @@ -831,10 +834,12 @@ miopenConvolutionBackwardDataGetWorkSpaceSize(miopenHandle_t handle, * execute this function, miopenConvolutionBackwardsDataGetWorkSpaceSize() must be run to determine * the required memory for this search. * - * * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If a + * * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If + * a * configuration match is not found, a default configuration will be returned. * - * * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. If + * * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. + * If * a match is not found, an exhaustive search is performed by running individual algorithms. * * @param handle MIOpen handle (input) @@ -944,10 +949,12 @@ miopenConvolutionBackwardWeightsGetWorkSpaceSize(miopenHandle_t handle, * execute this function, miopenConvolutionBackwardsWeightsGetWorkSpaceSize() must be run to * determine the required memory for this search. * - * * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If a + * * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If + * a * configuration match is not found, a default configuration will be returned. * - * * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. If + * * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. + * If * a match is not found, an exhaustive search is performed by running individual algorithms. * * @param handle MIOpen handle (input) @@ -1938,11 +1945,11 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNHiddenTensorSize(miopenHandle_t handle, * hidden state GEMM. * * * paramID 0 and 4 are for the input gate operations. - * + * * * paramID 1 and 5 are for the forget gate operations. - * + * * * paramID 2 and 6 are for the memory gate operations. - * + * * * paramID 3 and 7 are for the output gate operations. * * For miopenGRU paramID 0 to 2 refer to the weight matrix offset associated @@ -1991,11 +1998,11 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParamSize(miopenHandle_t handle, * * * biasID 3 and 7 are for the output gate operations. * - * For miopenGRU biasID 0 to 2 refer to the biases associated with the input GEMM, + * For miopenGRU biasID 0 to 2 refer to the biases associated with the input GEMM, * while 3 through 5 are associated with the hidden state GEMM. * * * biasID 0 and 3 are for the reset gate operations. - * + * * * biasID 1 and 4 are for the update gate operations. * * * biasID 2 and 5 are for the memory gate operations. @@ -2030,11 +2037,11 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBiasSize(miopenHandle_t handle, * hidden state GEMM. * * * paramID 0 and 4 are for the input gate operations. - * + * * * paramID 1 and 5 are for the forget gate operations. - * + * * * paramID 2 and 6 are for the memory gate operations. - * + * * * paramID 3 and 7 are for the output gate operations. * * For miopenGRU paramID 0 to 2 refer to the weight matrix offset associated @@ -2059,8 +2066,8 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBiasSize(miopenHandle_t handle, * nullptr then only the paramDesc is populated and returned. The size in bytes of the * layer parameter matrix can be determined by using miopenGetRNNLayerParamSize(). * - * Note: When inputSkip mode is selected there is no input layer matrix operation, - * and therefore no associated memory. In this case miopenGetRNNLayerParam() will return + * Note: When inputSkip mode is selected there is no input layer matrix operation, + * and therefore no associated memory. In this case miopenGetRNNLayerParam() will return * a error status miopenStatusBadParm for input paramID associated with the input GEMM. * * @param handle MIOpen handle (input) @@ -2106,11 +2113,11 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParam(miopenHandle_t handle, * * biasID 3 and 7 are for the output gate operations. * * - * For miopenGRU biasID 0 to 2 refer to the biases associated with the input GEMM, + * For miopenGRU biasID 0 to 2 refer to the biases associated with the input GEMM, * while 3 through 5 are associated with the hidden state GEMM. * * * biasID 0 and 3 are for the reset gate operations. - * + * * * biasID 1 and 4 are for the update gate operations. * * * biasID 2 and 5 are for the memory gate operations. @@ -2127,8 +2134,8 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParam(miopenHandle_t handle, * nullptr then only the biasDesc is populated and returned. The size in bytes of the * layer bias can be determined by using miopenGetRNNLayerBiasSize(). * - * Note: When inputSkip mode is selected there is no input layer matrix operation, - * and therefore no associated memory. In this case miopenGetRNNLayerBias() will return + * Note: When inputSkip mode is selected there is no input layer matrix operation, + * and therefore no associated memory. In this case miopenGetRNNLayerBias() will return * a error status miopenStatusBadParm for input biasID associated with the input GEMM. * * @param handle MIOpen handle (input) @@ -2166,11 +2173,11 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBias(miopenHandle_t handle, * hidden state GEMM. * * * paramID 0 and 4 are for the input gate operations. - * + * * * paramID 1 and 5 are for the forget gate operations. - * + * * * paramID 2 and 6 are for the memory gate operations. - * + * * * paramID 3 and 7 are for the output gate operations. * * For miopenGRU paramID 0 to 2 refer to the weight matrix offset associated @@ -2193,8 +2200,8 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBias(miopenHandle_t handle, * The argument layerParamOffset should either be nullptr, or an address to place the * offset. If layerParamOffset is nullptr then only the paramDesc is populated and returned. * - * Note: When inputSkip mode is selected there is no input layer matrix operation, - * and therefore no associated memory. In this case miopenGetRNNLayerParamOffset() will return + * Note: When inputSkip mode is selected there is no input layer matrix operation, + * and therefore no associated memory. In this case miopenGetRNNLayerParamOffset() will return * a error status miopenStatusBadParm for input paramID associated with the input GEMM. * * @@ -2232,11 +2239,11 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParamOffset(miopenRNNDescriptor_t * * * biasID 3 and 7 are for the output gate operations. * - * For miopenGRU biasID 0 to 2 refer to the biases associated with the input GEMM, + * For miopenGRU biasID 0 to 2 refer to the biases associated with the input GEMM, * while 3 through 5 are associated with the hidden state GEMM. * * * biasID 0 and 3 are for the reset gate operations. - * + * * * biasID 1 and 4 are for the update gate operations. * * * biasID 2 and 5 are for the memory gate operations. @@ -2251,8 +2258,8 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParamOffset(miopenRNNDescriptor_t * The argument layerBiasOffset should either be nullptr, or point to an output address. * If layerBias is nullptr then only the biasDesc is populated and returned. * - * Note: When inputSkip mode is selected there is no input layer matrix operation, - * and therefore no associated memory. In this case miopenGetRNNLayerBiasOffset() will return + * Note: When inputSkip mode is selected there is no input layer matrix operation, + * and therefore no associated memory. In this case miopenGetRNNLayerBiasOffset() will return * a error status miopenStatusBadParm for input biasID associated with the input GEMM. * * @param rnnDesc RNN layer descriptor type (input) @@ -2284,11 +2291,11 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBiasOffset(miopenRNNDescriptor_t r * hidden state GEMM. * * * paramID 0 and 4 are for the input gate operations. - * + * * * paramID 1 and 5 are for the forget gate operations. - * + * * * paramID 2 and 6 are for the memory gate operations. - * + * * * paramID 3 and 7 are for the output gate operations. * * @@ -2308,8 +2315,8 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBiasOffset(miopenRNNDescriptor_t r * The input argument paramDesc is a previously populated tensor descriptor typically * by first calling miopenGetRNNLayerParam(). * - * Note: When inputSkip mode is selected there is no input layer matrix operation, - * and therefore no associated memory. In this case miopenSetRNNLayerParam() will return + * Note: When inputSkip mode is selected there is no input layer matrix operation, + * and therefore no associated memory. In this case miopenSetRNNLayerParam() will return * a error status miopenStatusBadParm for input paramID associated with the input GEMM. * * @param handle MIOpen handle (input) @@ -2354,11 +2361,11 @@ MIOPEN_EXPORT miopenStatus_t miopenSetRNNLayerParam(miopenHandle_t handle, * * biasID 3 and 7 are for the output gate operations. * * - * For miopenGRU biasID 0 to 2 refer to the biases associated with the input GEMM, + * For miopenGRU biasID 0 to 2 refer to the biases associated with the input GEMM, * while 3 through 5 are associated with the hidden state GEMM. * * * biasID 0 and 3 are for the reset gate operations. - * + * * * biasID 1 and 4 are for the update gate operations. * * * biasID 2 and 5 are for the memory gate operations. @@ -2369,8 +2376,8 @@ MIOPEN_EXPORT miopenStatus_t miopenSetRNNLayerParam(miopenHandle_t handle, * The input argument biasDesc is a previously populated tensor descriptor typically * by first calling miopenGetRNNLayeBias(). * - * Note: When inputSkip mode is selected there is no input layer matrix operation, - * and therefore no associated memory. In this case miopenSetRNNLayerBias will return + * Note: When inputSkip mode is selected there is no input layer matrix operation, + * and therefore no associated memory. In this case miopenSetRNNLayerBias will return * a error status miopenStatusBadParm for input biasID associated with the input GEMM. * * @param handle MIOpen handle (input) @@ -2412,14 +2419,14 @@ MIOPEN_EXPORT miopenStatus_t miopenSetRNNLayerBias(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param hx Pointer to the hidden layer input tensor. If hx is NULL, + * @param hx Pointer to the hidden layer input tensor. If hx is NULL, * then the initial hidden state will be zero initialized. (input) * @param cxDesc A cell tensor descriptor that has as its first dimension * of the number of layers if the direction mode is unidirectional and twice the * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param cx Pointer to the cell layer input tensor. If cx is NULL, + * @param cx Pointer to the cell layer input tensor. If cx is NULL, * then the initial cell state will be zero initialized. (input) * @param wDesc A weights tensor descriptor (input) * @param w Pointer to input weights tensor (input) @@ -2436,14 +2443,14 @@ MIOPEN_EXPORT miopenStatus_t miopenSetRNNLayerBias(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param hy Pointer to the hidden layer output tensor. If hy is NULL, + * @param hy Pointer to the hidden layer output tensor. If hy is NULL, * then the final hidden state will not be saved. (output) * @param cyDesc A cell tensor descriptor that has as its first dimension * of the number of layers if the direction mode is unidirectional and twice the * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param cy Pointer to the cell layer output tensor. If hy is NULL, + * @param cy Pointer to the cell layer output tensor. If hy is NULL, * then the final cell state will not be saved. (output) * @param workSpace Pointer to memory allocated for forward training (input) * @param workSpaceNumBytes Number of allocated bytes in memory for the workspace (input) @@ -2501,7 +2508,7 @@ MIOPEN_EXPORT miopenStatus_t miopenRNNForwardTraining(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param dcy Pointer to the cell layer input tensor. If dcy is NULL, + * @param dcy Pointer to the cell layer input tensor. If dcy is NULL, * then the initial delta cell state will be zero initialized. (input) * @param wDesc A weights tensor descriptor (input) * @param w Pointer to input weights tensor (input) @@ -2510,14 +2517,14 @@ MIOPEN_EXPORT miopenStatus_t miopenRNNForwardTraining(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param hx Pointer to the hidden layer input tensor. If hx is NULL, + * @param hx Pointer to the hidden layer input tensor. If hx is NULL, * then the initial hidden state will be zero initialized. (input) * @param cxDesc A input cell tensor descriptor that has as its first dimension * of the number of layers if the direction mode is unidirectional and twice the * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param cx Pointer to the hidden layer input tensor. If cx is NULL, + * @param cx Pointer to the hidden layer input tensor. If cx is NULL, * then the initial cell state will be zero initialized. (input) * @param dxDesc An array of tensor descriptors. These are the * input descriptors to each time step. The first dimension of each descriptor is the @@ -2591,7 +2598,7 @@ MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardData(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param hx Pointer to the hidden layer input tensor. If hx is NULL, + * @param hx Pointer to the hidden layer input tensor. If hx is NULL, * then the initial hidden state will be zero initialized. (input) * @param yDesc An array of fully packed tensor descriptors associated * with the output from each time step. The first dimension of the tensor descriptors @@ -2643,14 +2650,14 @@ MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardWeights(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param hx Pointer to the hidden layer input tensor. If hx is NULL, + * @param hx Pointer to the hidden layer input tensor. If hx is NULL, * then the initial hidden state will be zero initialized. (input) * @param cxDesc A cell tensor descriptor that has as its first dimension * of the number of layers if the direction mode is unidirectional and twice the * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param cx Pointer to the cell layer input tensor. If cx is NULL, + * @param cx Pointer to the cell layer input tensor. If cx is NULL, * then the initial cell state will be zero initialized. (input) * @param wDesc A weights tensor descriptor (input) * @param w Pointer to input weights tensor (input) @@ -2667,14 +2674,14 @@ MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardWeights(miopenHandle_t handle, * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param hy Pointer to the hidden layer output tensor. If hy is NULL, + * @param hy Pointer to the hidden layer output tensor. If hy is NULL, * then the final hidden state will not be saved. (output) * @param cyDesc A output cell tensor descriptor that has as its first dimension * of the number of layers if the direction mode is unidirectional and twice the * number of layers if the direction mode is bidirectional. The second dimension of * the descriptor must equal the largest first dimension of the xDesc tensor descriptor * array. The third dimension equals the hiddenSize. (input) - * @param cy Pointer to the cell layer output tensor. If cy is NULL, + * @param cy Pointer to the cell layer output tensor. If cy is NULL, * then the final cell state will not be saved. (output) * @param workSpace Pointer to memory allocated for forward training (input) * @param workSpaceNumBytes Number of allocated bytes in memory for the workspace (input) diff --git a/test/rnn_vanilla.cpp b/test/rnn_vanilla.cpp index 9838652d87..db9348cc8c 100644 --- a/test/rnn_vanilla.cpp +++ b/test/rnn_vanilla.cpp @@ -45,7 +45,7 @@ #include #include -#define MIO_RNN_TEST_DEBUG 0 +#define MIO_RNN_TEST_DEBUG 1 #define MIO_RNN_TIME_EVERYTHING 0 /********************************************** From 044918c2262af0fd13fa92eb41e1614a4c593538 Mon Sep 17 00:00:00 2001 From: Daniel Lowell Date: Sun, 25 Mar 2018 20:40:30 -0500 Subject: [PATCH 04/12] Removed debug flag. --- test/rnn_vanilla.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/rnn_vanilla.cpp b/test/rnn_vanilla.cpp index db9348cc8c..9838652d87 100644 --- a/test/rnn_vanilla.cpp +++ b/test/rnn_vanilla.cpp @@ -45,7 +45,7 @@ #include #include -#define MIO_RNN_TEST_DEBUG 1 +#define MIO_RNN_TEST_DEBUG 0 #define MIO_RNN_TIME_EVERYTHING 0 /********************************************** From eec8ea11ec64cd34b70f3ed86d1a8dcf70848a39 Mon Sep 17 00:00:00 2001 From: Daniel Lowell Date: Sun, 25 Mar 2018 20:43:42 -0500 Subject: [PATCH 05/12] More formatting. --- src/ocl/rnnocl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ocl/rnnocl.cpp b/src/ocl/rnnocl.cpp index 6a42a2fcec..c1edb6f444 100644 --- a/src/ocl/rnnocl.cpp +++ b/src/ocl/rnnocl.cpp @@ -3682,7 +3682,7 @@ void RNNDescriptor::RNNBackwardWeights(Handle& handle, { int in_bias_val = inputMode == miopenRNNskip ? 0 : wei_stride; - hid_shift = li * batch_n * hy_stride; + hid_shift = li * batch_n * hy_stride; wei_shift = (li == 0) ? (wei_shift_bias + in_bias_val) : (wei_shift_bias + in_bias_val + li * 2 * wei_stride); From c0c32e82bddfbe4ea04a7f4eeed7629438db8823 Mon Sep 17 00:00:00 2001 From: mayank daga Date: Fri, 30 Mar 2018 13:19:12 -0500 Subject: [PATCH 06/12] multi-line formatting --- include/miopen/miopen.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index 1954adf61c..01aff0be79 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -700,12 +700,10 @@ miopenConvolutionForwardGetWorkSpaceSize(miopenHandle_t handle, * run to determine the required memory for this search. * * * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If - * a - * configuration match is not found, a default configuration will be returned. + * a configuration match is not found, a default configuration will be returned. * * * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. - * If - * a match is not found, an exhaustive search is performed by running individual algorithms. + * If a match is not found, an exhaustive search is performed by running individual algorithms. * * @param handle MIOpen handle (input) * @param xDesc Tensor descriptor for data input tensor x (input) @@ -835,12 +833,10 @@ miopenConvolutionBackwardDataGetWorkSpaceSize(miopenHandle_t handle, * the required memory for this search. * * * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If - * a - * configuration match is not found, a default configuration will be returned. + * a configuration match is not found, a default configuration will be returned. * * * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. - * If - * a match is not found, an exhaustive search is performed by running individual algorithms. + * If a match is not found, an exhaustive search is performed by running individual algorithms. * * @param handle MIOpen handle (input) * @param dyDesc Tensor descriptor for data input tensor dy (input) @@ -950,12 +946,10 @@ miopenConvolutionBackwardWeightsGetWorkSpaceSize(miopenHandle_t handle, * determine the required memory for this search. * * * If exhaustiveSearch == 0, MIOpen will look for the first kernel with a configuration match. If - * a - * configuration match is not found, a default configuration will be returned. + * a configuration match is not found, a default configuration will be returned. * * * If exhaustiveSearch == 1, MIOpen will look for the best kernel for the provided configuration. - * If - * a match is not found, an exhaustive search is performed by running individual algorithms. + * If a match is not found, an exhaustive search is performed by running individual algorithms. * * @param handle MIOpen handle (input) * @param dyDesc Tensor descriptor for data input tensor dy (input) From 4b44009f10b206515e5e51fbe9346f11f44814ec Mon Sep 17 00:00:00 2001 From: mayank daga Date: Fri, 30 Mar 2018 13:21:33 -0500 Subject: [PATCH 07/12] fixed numeric --- doc/src/perfdatabase.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/perfdatabase.md b/doc/src/perfdatabase.md index 11b0ce694c..20c34a7339 100644 --- a/doc/src/perfdatabase.md +++ b/doc/src/perfdatabase.md @@ -48,4 +48,4 @@ This variable allows to limit the scope of `MIOPEN_FIND_ENFORCE`, so that only f **CONV_BWD (3)** `MIOPEN_FIND_ENFORCE` affects only Backward Data convolutions. -**CONV_WRW (3)** `MIOPEN_FIND_ENFORCE` affects only Backward With Regard to Weights (a.k.a WRW) convolutions. \ No newline at end of file +**CONV_WRW (4)** `MIOPEN_FIND_ENFORCE` affects only Backward With Regard to Weights (a.k.a WRW) convolutions. From 768ba03b6e1f270b5b58f515496bd990eedf7b49 Mon Sep 17 00:00:00 2001 From: mayank daga Date: Fri, 30 Mar 2018 13:28:44 -0500 Subject: [PATCH 08/12] edits --- README.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b6ff4241d5..d0b58d5a14 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ This will install by default to `/usr/local` but it can be installed in another cmake -P install_deps.cmake --prefix /some/local/dir ``` -If Ubuntu v16 is used then the `OpenSSL` and `Boost` packages can also be installed by: +Instructions to manually install all the dependencies on Ubuntu v16 are present in this [section](#installing-the-dependencies-manually). ``` sudo apt-get install libssl-dev sudo apt-get install libboost-dev @@ -187,6 +187,18 @@ Also, githooks can be installed to format the code per-commit: ./.githooks/install ``` +## Installing the dependencies manually + +If Ubuntu v16 is used then the `OpenSSL` and `Boost` packages can also be installed by: +``` +sudo apt-get install libssl-dev +sudo apt-get install libboost-dev +sudo apt-get install libboost-system-dev +sudo apt-get install libboost-filesystem-dev +``` + +`half` header needs to be installed from [here](http://half.sourceforge.net/). + ## Using docker The easiest way is to use docker. You can build the top-level docker file: From 5d59939c85caf74ed645b9f4b7f1241979df33ba Mon Sep 17 00:00:00 2001 From: Mayank Daga Date: Fri, 30 Mar 2018 13:35:07 -0500 Subject: [PATCH 09/12] Update README.md --- README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README.md b/README.md index d0b58d5a14..d124bc83ae 100644 --- a/README.md +++ b/README.md @@ -33,12 +33,6 @@ cmake -P install_deps.cmake --prefix /some/local/dir ``` Instructions to manually install all the dependencies on Ubuntu v16 are present in this [section](#installing-the-dependencies-manually). -``` -sudo apt-get install libssl-dev -sudo apt-get install libboost-dev -sudo apt-get install libboost-system-dev -sudo apt-get install libboost-filesystem-dev -``` ## Installing MIOpen with pre-built packages From 98c159596b93cbbfde7b9e614ab0176cc20e491b Mon Sep 17 00:00:00 2001 From: mayank daga Date: Fri, 30 Mar 2018 13:39:08 -0500 Subject: [PATCH 10/12] fixed var. redefine --- src/kernels/MIOpenLRNBwd.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kernels/MIOpenLRNBwd.cl b/src/kernels/MIOpenLRNBwd.cl index b5c235b25e..9bdb6f7529 100644 --- a/src/kernels/MIOpenLRNBwd.cl +++ b/src/kernels/MIOpenLRNBwd.cl @@ -40,7 +40,7 @@ #endif #define _FLOAT2 PPCAT(_FLOAT, TWO) -#define _FLOAT2 PPCAT(_FLOAT, THREE) +#define _FLOAT3 PPCAT(_FLOAT, THREE) #define _FLOAT4 PPCAT(_FLOAT, FOUR) #define _FLOAT8 PPCAT(_FLOAT, EIGHT) From 2bbec29955877584fd4ccfcc9e56f66a45686df4 Mon Sep 17 00:00:00 2001 From: Mayank Daga Date: Fri, 30 Mar 2018 13:47:45 -0500 Subject: [PATCH 11/12] Update releasenotes.md --- doc/src/releasenotes.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/releasenotes.md b/doc/src/releasenotes.md index 9c0d6bf705..e8a93e8240 100644 --- a/doc/src/releasenotes.md +++ b/doc/src/releasenotes.md @@ -5,17 +5,17 @@ Notes: -- This release adds fp16 support for Inference using CNNs - Performance improvements for RNNs - Performance improvements for convolutions using 1x1 filters - Performance improvement for Batch Normalization +- This release adds preliminary fp16 support for Inference using CNNs - Bug fixes for various components of MIOpen -Changed: +Changes: - Added 2 new API for RNNs: miopenGetRNNLayerParamOffset and miopenGetRNNLayerBiasOffset -- Added in support for uninitialized hidden states and nullptr outputs in RNNs -- Added support for Set and Scale operations for strided tensors with dimensions 1,2,3,4,5 +- Added support for uninitialized hidden states and nullptr outputs in RNNs +- Added support for Set and Scale operations for strided tensors with dimensions 1 to 5 - Added multi-thread and multi-process support for the performance database - Improved performance for OpTensor - Fixed bug in convolutions for backward bias From 61ca3774ed5d4d54ca303a6c0c64a4f320ed5a83 Mon Sep 17 00:00:00 2001 From: Mayank Daga Date: Fri, 30 Mar 2018 13:49:18 -0500 Subject: [PATCH 12/12] Update README.md --- README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/README.md b/README.md index d124bc83ae..5c2706f597 100644 --- a/README.md +++ b/README.md @@ -20,14 +20,10 @@ AMD's library for high peformance machine learning primitives. MIOpen supports t ## Installing the dependencies -The dependencies can be installed with the `install_deps.cmake`, script: +The dependencies can be installed with the `install_deps.cmake`, script: `cmake -P install_deps.cmake` -``` -cmake -P install_deps.cmake -``` This will install by default to `/usr/local` but it can be installed in another location with `--prefix` argument: - ``` cmake -P install_deps.cmake --prefix /some/local/dir ```