-
Notifications
You must be signed in to change notification settings - Fork 1
/
sentence_generator.py
56 lines (41 loc) · 1.44 KB
/
sentence_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from utils.data_parser import load_data
vocab, unigrams, bigrams, trigrams = load_data()
def gen_sentence():
"""Creates a randomly generated sentence according to some word distribution
Returns:
list: list of words representing the sentence
"""
start = 153
end = 152
# initialize the sentence with the start word
sentence = [start]
# append a word using the bigram of the start character
sentence.append(bigrams[start].guess())
while sentence[-1] != end:
# try to guess a word using the trigram of the previous two characters
guess = trigrams[tuple(sentence[-2:])].guess()
if guess == 0:
# if no word was found try using the bigram
guess = bigrams[sentence[-1]].guess()
if guess == 0:
# if still no word was found use the unigram
guess = unigrams.guess()
sentence.append(guess)
# substitute the word indices with the real words
word_sentence = [vocab[i] for i in sentence]
return word_sentence
def print_sentence(sentence):
"""Pretty print sentence list
Args:
sentence (list): list of words to print
"""
text = sentence[1]
punctuation = ['.', ',', ';', ':', '!', '?']
for word in sentence[2:-1]:
if word not in punctuation:
text += " "
text += word
print(text)
if __name__ == "__main__":
sentence = gen_sentence()
print_sentence(sentence)