From 95b4377f87ccf834e65991963a6ddb330a7be239 Mon Sep 17 00:00:00 2001 From: xuewanqi Date: Thu, 19 Jul 2018 15:48:09 +0000 Subject: [PATCH] SINGA-386 Implement RNN operation for autograd - redesign some APIs to adapt to autograd --- python/singa/autograd.py | 125 +++++++++++++++++++++++++++++++++---- src/model/operation/rnn.cc | 79 ++++++++--------------- src/model/operation/rnn.h | 7 +-- 3 files changed, 141 insertions(+), 70 deletions(-) mode change 100644 => 100755 src/model/operation/rnn.cc mode change 100644 => 100755 src/model/operation/rnn.h diff --git a/python/singa/autograd.py b/python/singa/autograd.py index 1d649ca9b3..dc58cfa291 100755 --- a/python/singa/autograd.py +++ b/python/singa/autograd.py @@ -943,34 +943,135 @@ class _RNN(Operation): def __init__(self, handle): self.handle = handle - def forward(self, X, W): + def forward(self, X, h0, c0, W): + # X of shape (seq_len, batch, input_size) + # h0_c0: (h0, c0) if lstm, else (h0,) + # h0, c0 of shape (num_layers * num_directions, batch, hidden_size) + if c0 is None: + assert self.rnn_mode != 'lstm' + c0= CTensor([]) # CTensor([]) and Tensor cx are the same? if self.handle.device_id == -1: raise NotImplementedError else: if training: - out, self.cache = singa.GpuRNNForwardTraining( - self.handle, X, W) + Y, hout, cout = singa.GpuRNNForwardTraining( + self.handle, X, h0, c0, W) + self.cache=(X, Y, h0, c0, W) else: - out = singa.GpuRNNForwardInference(self.handle, X, W) - return out + Y, hout, cout = singa.GpuRNNForwardInference( + self.handle, X, h0, c0, W) + + # Y of shape (seq_len, batch, hidden_size * num_directions) + # hout_cout: (hout, cout) if lstm, else (hout,) + # hout, cout of shape (num_layers * num_directions, batch, + # hidden_size) + oututs= 1dTo3d(Y) + + if self.rnn_mode != 'lstm': + return outputs, hout + else: + return outputs, hout, cout - def backward(self, dY): + def backward(self, dY, dh, dc=CTensor([])): assert training is True and hasattr( self, 'cache'), 'Please set training as True before do BP. ' - if dY.device().id() != self.handle.device_id: - dY.ToDevice(self.inputs[0].device()) + dY_1d= 3dTo1d(dY) + + if dY_1d.device().id() != self.handle.device_id: + dY_1d.ToDevice(self.cache[0].device()) if self.handle.device_id == -1: raise NotImplementedError else: - dX, dW = singa.GpuRNNBackward(self.handle, dY, self.cache) - return dX, dW + dX_1d, dhout, dcout, dW = singa.GpuRNNBackward( + self.handle, dY_1d, dh, dc, self.cache) + dX = 1dTo3d(dX_1d) -def rnn(): - pass + if self.rnn_mode != 'lstm': + return dX, dhout, dW + else: + return dX, dhout, dcout, dW + + +def rnn(handle, x, h0, c0, W): + return _RNN(handle)(x, h0, c0, W) class RNN(Layer): + + def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0, bidirectional=False, rnn_mode='tanh'): + self.input_size = input_size + self.hidden_size = hidden_size + self.num_layers = num_layers + self.bias = bias + self.batch_first = batch_first + self.dropout = dropout + self.bidirectional = bidirectional + self.rnn_mode = rnn_mode + + if bias is not True or batch_first is not False: + raise NotImplementedError + + mult = 1 + if self.rnn_mode == 'tanh' or self.rnn_mode == 'relu': + mult *= 1 + elif self.rnn_mode == 'lstm': + mult *= 4 + elif self.rnn_mode == 'gru': + mult *= 3 + else: + raise ValueError + + if self.bidirectional: + mult *= 2 + + for k in range(num_layers): + if k == 1: + w_size = self.hidden_size * \ + (self.input_size + self.hidden_size + 2) + else: + w_size = self.hidden_size * \ + (self.hidden_size + self.hidden_size + 2) + W_Size *= mult * w_size + + self.W_Size = W_Size + self.W = Tensor(shape=(W_Size,), requires_grad=True, stores_grad=True) + self.W.uniform(0.0, 1.0) + + def __call__(self, inputs, h0, c0=None): + # inputs of shape (seq_len, batch, input_size) + # h0_c0: (h0, c0) if lstm, else (h0,) + # h0, c0 of shape (num_layers * num_directions, batch, hidden_size) + + self.device_check(inputs, h0, self.W) + + if self.rnn_mode == 'lstm': + assert c0 is not None, 'Please input c0.' + self.device_check(h0, c0) + + self.handle = signa.CudnnRNNHandle(inputs.data, *SOME_PARAMETERS*) + self.handle.device_id = inputs.device.id() + + X= 3dTo1d(inputs) + outputs = rnn(self.handle, X, h0, c0, self.W) + return outputs + +def 3dTo1d(self, inputs): + pass + +def 1dTo3d(self, *args): + pass + +class LSTM(RNN): + + def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0, bidirectional=False): + super(LSTM, self).__init__(input_size, hidden_size, num_layers, bias, batch_first, dropout, bidirectional,rnn_mode='lstm') + + +class GRU(RNN): + + def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0, bidirectional=False): + super(GRU, self).__init__(input_size, hidden_size, num_layers, bias, batch_first, dropout, bidirectional,rnn_mode='gru') diff --git a/src/model/operation/rnn.cc b/src/model/operation/rnn.cc old mode 100644 new mode 100755 index afeba67686..79c397d196 --- a/src/model/operation/rnn.cc +++ b/src/model/operation/rnn.cc @@ -263,24 +263,21 @@ vector SplitOutput(size_t num, size_t dim, return outputs; }; -std::vector> GpuRNNForwardTraining(const CudnnRNNHandle &crh, const vector &inputs, const Tensor &W) { - DataType dtype = inputs.at(0).data_type(); - auto dev = inputs.at(0).device(); +std::vector GpuRNNForwardTraining(const CudnnRNNHandle &crh, const Tensor &input, const Tensor &hx, const Tensor &cx, const Tensor &W) { + DataType dtype = input.data_type(); + auto dev = input.at(0).device(); - CHECK_GT(inputs.size(), 1u + crh.has_cell_); - size_t num_x = inputs.size() - crh.has_cell_ - 1; - Tensor input = MergeInputs(num_x, inputs); Shape outshape{input.Size() * crh.hidden_size_ / crh.input_size_ * crh.num_directions_}; Tensor output(outshape, dev, dtype); // LOG(INFO) << "output size " << output.Size(); - Tensor hx = inputs.at(num_x); + Shape state_shape{crh.num_stacks_ * crh.num_directions_, crh.batch_size_, crh.hidden_size_}; + CHECK_EQ(hx.shape(), state_shape); Tensor hy(state_shape, dev, dtype); - Tensor cy, cx; + Tensor cy; if (crh.has_cell_) { - cx = inputs.at(num_x + 1); cy.ResetLike(hy); } @@ -330,39 +327,23 @@ std::vector> GpuRNNForwardTraining(const CudnnRNNHandle &crh }, {inb, wb, hxb, cxb}, {outb, hyb, cyb, wspace, rspace}); - auto outputs = - SplitOutput(num_x, crh.hidden_size_ * crh.num_directions_, inputs, output); - outputs.push_back(hy); - if (crh.has_cell_) outputs.push_back(cy); - - std::vector cache; - cache.push_back(input); - cache.push_back(output); - cache.push_back(hx); - cache.push_back(cx); - cache.push_back(W); - - return {outputs, cache}; + return {output, hy, cy}; }; -std::vector GpuRNNForwardInference(const CudnnRNNHandle &crh, const vector &inputs, const Tensor &W) { - DataType dtype = inputs.at(0).data_type(); - auto dev = inputs.at(0).device(); - - CHECK_GT(inputs.size(), 1u + crh.has_cell_); - size_t num_x = inputs.size() - crh.has_cell_ - 1; - Tensor input = MergeInputs(num_x, inputs); +std::vector GpuRNNForwardInference(const CudnnRNNHandle &crh, const Tensor &input, const Tensor &hx, const Tensor &cx, const Tensor &W) { + DataType dtype = input.data_type(); + auto dev = input.device(); Shape outshape{input.Size() * crh.hidden_size_ / crh.input_size_ * crh.num_directions_}; Tensor output(outshape, dev, dtype); // LOG(INFO) << "output size " << output.Size(); - Tensor hx = inputs.at(num_x); + Shape state_shape{crh.num_stacks_ * crh.num_directions_, crh.batch_size_, crh.hidden_size_}; + CHECK_EQ(hx.shape(), state_shape); Tensor hy(state_shape, dev, dtype); - Tensor cy, cx; + Tensor cy; if (crh.has_cell_) { - cx = inputs.at(num_x + 1); cy.ResetLike(hy); } @@ -405,15 +386,10 @@ std::vector GpuRNNForwardInference(const CudnnRNNHandle &crh, const vect // clang-format on }, {inb, wb, hxb, cxb}, {outb, hyb, cyb, wspace}); - auto outputs = - SplitOutput(num_x, crh.hidden_size_ * crh.num_directions_, inputs, output); - outputs.push_back(hy); - if (crh.has_cell_) outputs.push_back(cy); - - return outputs; + return {output, hy, cy}; }; -std::pair, Tensor> GpuRNNBackward(const CudnnRNNHandle &crh, const vector &grads, const vector &cache) { +std::vector GpuRNNBackward(const CudnnRNNHandle &crh, const vector &dY, const Tensor &dh, const Tensor &dc, const vector &cache) { const Tensor x = cache[0]; const Tensor y = cache[1]; const Tensor hx = cache[2]; @@ -423,24 +399,24 @@ std::pair, Tensor> GpuRNNBackward(const CudnnRNNHandle &crh, cons auto dev = y.device(); auto dtype = y.data_type(); - CHECK_GT(grads.size(), 1u + crh.has_cell_); - size_t num_dy = grads.size() - crh.has_cell_ - 1; - CHECK_EQ(num_dy, crh.seq_length_); - const Tensor dy = MergeInputs(num_dy, grads); - CHECK_EQ(dy.Size(), y.Size()); - const Tensor dhy = grads.at(num_dy); - Tensor dcy; - if (crh.has_cell_) - dcy = grads.at(num_dy + 1); + + CHECK_EQ(dY.Size(), y.Size()); + Shape xshape{y.Size() * crh.input_size_ / crh.hidden_size_ / crh.num_directions_}; + CHECK_EQ(x.shape(), xshape) Tensor dx(xshape, dev, dtype); + Tensor dw(W.shape(), dev, dtype); + Shape state_shape{crh.num_stacks_ * crh.num_directions_, crh.batch_size_, crh.hidden_size_}; + CHECK_EQ(hx.shape(), state_shape) Tensor dhx(state_shape, dev, dtype); + Tensor dcx; if (crh.has_cell_) dcx.ResetLike(dhx); + dw.SetValue(0.0f); Block *yb = y.block(), *dyb = dy.block(), *dhyb = dhy.block(), *dcyb = dcy.block(), *xb = x.block(), *cxb = cx.block(), @@ -483,12 +459,7 @@ std::pair, Tensor> GpuRNNBackward(const CudnnRNNHandle &crh, cons {yb, dyb, dhyb, dcyb, xb, wb, wspace, rspace}, {dxb, dwb, dhxb, dcxb, wspace, rspace}); - auto data_grads = SplitOutput(num_dy, crh.input_size_, grads, dx); - data_grads.push_back(dhx); - if (crh.has_cell_) - data_grads.push_back(dcx); - - return std::make_pair(data_grads, dw); + return {dx, dhx, dcx, dw}; }; #endif // USE_CUDNN diff --git a/src/model/operation/rnn.h b/src/model/operation/rnn.h old mode 100644 new mode 100755 index 0dbbac9974..7a90ff8995 --- a/src/model/operation/rnn.h +++ b/src/model/operation/rnn.h @@ -69,18 +69,17 @@ class CudnnRNNHandle: public RNNHandle { Tensor reserve_space_; Tensor dropout_state_; }; - Tensor MergeInputs(size_t num, const vector &in); vector SplitOutput(size_t num, size_t dim, const vector &in, const Tensor output); -std::vector> GpuRNNForwardTraining(const CudnnRNNHandle &crh, const vector &inputs, const Tensor &W); +std::vector GpuRNNForwardTraining(const CudnnRNNHandle &crh, const Tensor &input, const Tensor &hx, const Tensor &cx, const Tensor &W) ; -std::vector GpuRNNForwardInference(const CudnnRNNHandle &crh, const vector &inputs, const Tensor &W); +std::vector GpuRNNForwardInference(const CudnnRNNHandle &crh, const Tensor &input, const Tensor &hx, const Tensor &cx, const Tensor &W); -std::pair, Tensor> GpuRNNBackward(const CudnnRNNHandle &crh, const vector &grads, const vector &cache); +std::vector GpuRNNBackward(const CudnnRNNHandle &crh, const vector &dY, const Tensor &dh, const Tensor &dc, const vector &cache); #endif // USE_CUDNN