-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsongs_lyrics_generator.py
169 lines (115 loc) · 4.04 KB
/
songs_lyrics_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import numpy as np
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten, Embedding
from tensorflow.keras.models import Sequential, model_from_json
from tensorflow.keras.utils import to_categorical, normalize
from random import randint
# READING THE DATA
file = open('D://training_data/lyrics.txt', 'r')
text = file.read()
file.close()
chars = sorted(list(set(text))) # Contains all the characters
# create mapping of characters to integers (0-25) and the reverse
char2int = dict((c, i) for i, c in enumerate(chars))
int2char = dict((i, c) for i, c in enumerate(chars))
print(len(chars))
step = 5
sequence_length = 40
input_seq = []
input_seq1 = []
output_seq = []
output_seq1 = []
for i in range(0, l - sequence_length, step):
input_ = text[i:i + sequence_length]
output_ = text[i + sequence_length]
input_seq1.append(input_)
output_seq1.append(output_)
input_seq.append([char2int[ch] for ch in input_])
output_seq.append([char2int[ch] for ch in output_])
# ONE-HOT ENCODING
x_data = np.zeros((len(input_seq),sequence_length,len(chars)))
# normalize
for i, sentence in enumerate(input_seq1):
for t, char in enumerate(sentence):
x_data[i, t, char2int[char]] = 1
# one hot encode the output variable
y_data = to_categorical(output_seq)
print(np.shape(x_data))
print(np.shape(y_data))
#####################################################################################################
model = Sequential()
model.add(LSTM(256,input_shape=(40,42),return_sequences=True))
model.add(Dropout(0.8))
model.add(LSTM(128))
model.add(Dropout(0.8))
model.add(Dense(42,activation='softmax'))
model.compile(optimizer='RMSprop', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x_data,y_data, validation_split=0.2, batch_size=200, epochs=300)
######################################################################################################
#########
# SAVING THE MODEL
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")
#########
''' LOADING THE SAVED MODEL
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")
'''
#######################################################################################################
# Making Predictions
#######################################################################################################
def onehot2num(arr):
c = 0
index = 0
flag = True
for i in y_data:
i = list(i)
if i == list(arr):
# print('Found at:')
# print(c)
index = c
flag = False
break
c += 1
if flag:
print('NOT FOUND')
return output_seq[index]
x = x_data[1:2]
start_index = randint(1, len(x_data))
# x = x_data[start_index:start_index+1]
x_chars = list(input_seq1[1:2])
# x_chars = list(input_seq1[start_index:start_index+1])
outy = x_chars
for i in range(1000):
predictions = model.predict(x)
prediction = predictions[0]
alpha = np.zeros((1, 42))
# print(np.argmax(prediction))
alpha[0][np.argmax([prediction])] = 1
beta = onehot2num(alpha[0])
ch = str(int2char[beta[0]])
outy.append(ch)
x_chars = [(x_chars[0] + ch)[1:]]
x = np.zeros((1, sequence_length, len(chars)))
# normalize
for i, sentence in enumerate(x_chars):
for t, char in enumerate(sentence):
x[i, t, char2int[char]] = 1
# print(i,sentence)
predictions = [[]]
prediction = []
output_sentence = ''
for word in outy:
output_sentence += word
#########
print(output_sentence)