Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix some bugs. #10

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ Options:
--model Whether to use LSTM or GRU units gru
--n_epochs Number of epochs to train 2000
--print_every Log learning rate at this interval 100
--hidden_size Hidden size of GRU 50
--hidden_size Hidden size of GRU 128
--n_layers Number of GRU layers 2
--learning_rate Learning rate 0.01
--chunk_len Length of training chunks 200
--batch_size Number of examples per batch 100
--batch_size Number of examples per batch 64
--cuda Use CUDA
```

Expand Down
22 changes: 10 additions & 12 deletions generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,18 @@
from helpers import *
from model import *

def generate(decoder, prime_str='A', predict_len=100, temperature=0.8, cuda=False):
hidden = decoder.init_hidden(1)
prime_input = Variable(char_tensor(prime_str).unsqueeze(0))
def generate(decoder, all_characters, prime_str='A', predict_len=100, temperature=0.8, cuda=False):
hidden = decoder.init_hidden(1, cuda)
prime_input = Variable(char_tensor(prime_str, all_characters).unsqueeze(0))

if cuda:
hidden = hidden.cuda()
prime_input = prime_input.cuda()
predicted = prime_str

# Use priming string to "build up" hidden state
for p in range(len(prime_str) - 1):
_, hidden = decoder(prime_input[:,p], hidden)

inp = prime_input[:,-1]

_, hidden = decoder(prime_input, hidden)
inp = prime_input[0,-1].view(1, -1)

for p in range(predict_len):
output, hidden = decoder(inp, hidden)

Expand All @@ -33,7 +30,7 @@ def generate(decoder, prime_str='A', predict_len=100, temperature=0.8, cuda=Fals
# Add predicted character to string and use as next input
predicted_char = all_characters[top_i]
predicted += predicted_char
inp = Variable(char_tensor(predicted_char).unsqueeze(0))
inp = Variable(char_tensor(predicted_char, all_characters).unsqueeze(0))
if cuda:
inp = inp.cuda()

Expand All @@ -51,7 +48,8 @@ def generate(decoder, prime_str='A', predict_len=100, temperature=0.8, cuda=Fals
argparser.add_argument('--cuda', action='store_true')
args = argparser.parse_args()

decoder = torch.load(args.filename)
all_characters, decoder = torch.load(args.filename, map_location = 'cuda' if args.cuda else 'cpu')
del args.filename
print(generate(decoder, **vars(args)))
with torch.no_grad():
print(generate(decoder, all_characters, **vars(args)))

12 changes: 4 additions & 8 deletions helpers.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,20 @@
# https://github.com/spro/char-rnn.pytorch

import unidecode
import string
import random
import time
import math
import torch

# Reading and un-unicode-encoding data

all_characters = string.printable
n_characters = len(all_characters)

def read_file(filename):
file = unidecode.unidecode(open(filename).read())
return file, len(file)
file = open(filename).read()
all_characters = list(set(file))
return file, len(file), all_characters, len(all_characters)

# Turning a string into a tensor

def char_tensor(string):
def char_tensor(string, all_characters):
tensor = torch.zeros(len(string)).long()
for c in range(len(string)):
try:
Expand Down
28 changes: 13 additions & 15 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,25 @@ def __init__(self, input_size, hidden_size, output_size, model="gru", n_layers=1

self.encoder = nn.Embedding(input_size, hidden_size)
if self.model == "gru":
self.rnn = nn.GRU(hidden_size, hidden_size, n_layers)
self.rnn = nn.GRU(hidden_size, hidden_size, n_layers, batch_first=True)
elif self.model == "lstm":
self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers)
self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers, batch_first=True)
self.decoder = nn.Linear(hidden_size, output_size)

def forward(self, input, hidden):
batch_size = input.size(0)
"""
input: shape=(batch_size, seq_size)
output: shape=(batch_size, seq_size, output_size)
"""
encoded = self.encoder(input)
output, hidden = self.rnn(encoded.view(1, batch_size, -1), hidden)
output = self.decoder(output.view(batch_size, -1))
output, hidden = self.rnn(encoded, hidden)
output = self.decoder(output)
return output, hidden

def forward2(self, input, hidden):
encoded = self.encoder(input.view(1, -1))
output, hidden = self.rnn(encoded.view(1, 1, -1), hidden)
output = self.decoder(output.view(1, -1))
return output, hidden

def init_hidden(self, batch_size):
def init_hidden(self, batch_size, cuda):
cuda_wrapper = lambda x: x.cuda() if cuda else x
if self.model == "lstm":
return (Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)),
Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)))
return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))
return (cuda_wrapper(Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))),
cuda_wrapper(Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))))
return cuda_wrapper(Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)))

32 changes: 16 additions & 16 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,29 @@
argparser.add_argument('--model', type=str, default="gru")
argparser.add_argument('--n_epochs', type=int, default=2000)
argparser.add_argument('--print_every', type=int, default=100)
argparser.add_argument('--hidden_size', type=int, default=100)
argparser.add_argument('--hidden_size', type=int, default=128)
argparser.add_argument('--n_layers', type=int, default=2)
argparser.add_argument('--learning_rate', type=float, default=0.01)
argparser.add_argument('--chunk_len', type=int, default=200)
argparser.add_argument('--batch_size', type=int, default=100)
argparser.add_argument('--batch_size', type=int, default=128)
argparser.add_argument('--shuffle', action='store_true')
argparser.add_argument('--cuda', action='store_true')
args = argparser.parse_args()

if args.cuda:
print("Using CUDA")

file, file_len = read_file(args.filename)
file, file_len, all_characters, n_characters = read_file(args.filename)

def random_training_set(chunk_len, batch_size):
inp = torch.LongTensor(batch_size, chunk_len)
target = torch.LongTensor(batch_size, chunk_len)
for bi in range(batch_size):
start_index = random.randint(0, file_len - chunk_len)
start_index = random.randint(0, file_len - chunk_len - 1)
end_index = start_index + chunk_len + 1
chunk = file[start_index:end_index]
inp[bi] = char_tensor(chunk[:-1])
target[bi] = char_tensor(chunk[1:])
inp[bi] = char_tensor(chunk[:-1], all_characters)
target[bi] = char_tensor(chunk[1:], all_characters)
inp = Variable(inp)
target = Variable(target)
if args.cuda:
Expand All @@ -50,24 +50,24 @@ def random_training_set(chunk_len, batch_size):
return inp, target

def train(inp, target):
hidden = decoder.init_hidden(args.batch_size)
if args.cuda:
hidden = hidden.cuda()
"""
inp: (batch_size, seq_size)
target: (batch_size, seq_size)
"""
hidden = decoder.init_hidden(args.batch_size, args.cuda)
decoder.zero_grad()
loss = 0

for c in range(args.chunk_len):
output, hidden = decoder(inp[:,c], hidden)
loss += criterion(output.view(args.batch_size, -1), target[:,c])
output, hidden = decoder(inp, hidden)
loss = criterion(output.view(-1, output.size(-1)), target.view(-1))

loss.backward()
decoder_optimizer.step()

return loss.data[0] / args.chunk_len
return loss.item()

def save():
save_filename = os.path.splitext(os.path.basename(args.filename))[0] + '.pt'
torch.save(decoder, save_filename)
torch.save((all_characters, decoder), save_filename)
print('Saved as %s' % save_filename)

# Initialize models and start training
Expand Down Expand Up @@ -97,7 +97,7 @@ def save():

if epoch % args.print_every == 0:
print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / args.n_epochs * 100, loss))
print(generate(decoder, 'Wh', 100, cuda=args.cuda), '\n')
print(generate(decoder, all_characters, 'Wh', 100, cuda=args.cuda), '\n')

print("Saving...")
save()
Expand Down