-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathmodels_are_here.py
78 lines (74 loc) · 3.24 KB
/
models_are_here.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import torch
from torch import nn,optim
from torch.autograd import Variable
import torch.nn.functional as F
class Attention_layer(nn.Module):
def __init__(self,sequence_length):
super(Attention_layer, self).__init__()
self.input_size=sequence_length
self.output_size=sequence_length
self.dense=nn.Linear(sequence_length,sequence_length)
self.softmax=nn.Softmax(dim=-1)
def forward(self, input_tensor): #input_tensor [B, T, D]
y=self.softmax(self.dense(input_tensor.permute(0,2,1))) # [B,D,T]
y=y.permute(0,2,1) # [B T D]
y=input_tensor*y
return y
class EncoderRNN(nn.Module):
def __init__(self, input_size, embed_size, hidden_size, n_layers=1, dropout=0):
super(EncoderRNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.embed_size = embed_size
self.n_layers = n_layers
self.dropout = dropout
self.embedding = nn.Embedding(input_size,embed_size,padding_idx=0)
self.lstm = nn.LSTM(embed_size, hidden_size, n_layers, dropout=self.dropout, bidirectional=True,batch_first=False)
def forward(self, input_tensor, attention, hidden=None):
'''
:param input_seqs:
Variable of shape (num_step(T),batch_size(B)), sorted decreasingly by lengths(for packing)
:param input:
list of sequence length
:param hidden:
initial state of GRU
:returns:
GRU outputs in shape (T,B,hidden_size(H))
last hidden stat of RNN(i.e. last output for GRU)
'''
embedded = self.embedding(input_tensor) # [B T D]
input_to_lstm=attention(embedded) # [B T D]
outputs, hidden = self.lstm(input_to_lstm.permute(1,0,2), hidden)
outputs = outputs[:, :, :self.hidden_size] + outputs[:, :, self.hidden_size:] # Sum bidirectional outputs
return outputs,hidden # [T B D]
class DecoderRNN(nn.Module):
def __init__(self, input_size, hidden_size, vocabulary, n_layers=1, dropout=0):
super(DecoderRNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.n_layers = n_layers
self.dropout = dropout
self.dense=nn.Linear(20,20)
self.softmax=nn.Softmax(dim=-1)
self.lstm = nn.LSTM(input_size, hidden_size, n_layers, dropout=self.dropout, bidirectional=False,batch_first=False)
self.dense_output=nn.Linear(hidden_size,vocabulary)
#self.softmax=nn.Softmax(dim=-1)
def forward(self, input_tensor, sequence_length=20, hidden=None):
'''
:param input_seqs:
Variable of shape (num_step(T),batch_size(B)), sorted decreasingly by lengths(for packing)
:param input:
list of sequence length
:param hidden:
initial state of GRU
:returns:
GRU outputs in shape (T,B,hidden_size(H))
last hidden stat of RNN(i.e. last output for GRU)
'''
y=input_tensor.permute(1,2,0) # [B H T]
y=self.softmax(self.dense(y)) # [B H T]
y=y.permute(2,0,1) # [T B H]
y=y*input_tensor
outputs, _ = self.lstm(y, hidden)
y=self.dense_output(outputs)
return y.permute(1,0,2) # [B T D]