-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmarkov.py
56 lines (40 loc) · 1.38 KB
/
markov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""
Neat resources:
http://setosa.io/blog/2014/07/26/markov-chains/
http://www.dartmouth.edu/~chance/teaching_aids/books_articles/probability_book/Chapter11.pdf
https://github.com/substack/node-markov/blob/master/index.js
https://golang.org/doc/codewalk/markov/
"""
from normalizer import Normalizer
from random import Random
class Markov(object):
def __init__(self, prefix_len = 3, corpus = None, seed = 1):
self.prefix_len = prefix_len
self.normalizer = Normalizer()
self.random = Random(seed)
self.chain = {}
if corpus:
self.ingest(corpus)
def ingest(self, corpus):
# TODO(jj): improve tokenizer ("tokenizer")
tokens = self.normalizer.normalize_symbol_boundaries(corpus).split()
for ii, token in enumerate(tokens):
if (ii + self.prefix_len) > len(tokens) - 1:
continue
prefix = ' '.join(tokens[ii:ii + self.prefix_len])
suffix = tokens[ii + self.prefix_len]
if not self.chain.get(prefix):
self.chain[prefix] = []
self.chain[prefix].append(suffix)
def next(self, key):
choices = self.chain[key]
if not len(choices):
return None
choice = self.random.randint(0, len(choices) - 1)
return choices[choice]
def get_key(self):
keys = self.chain.keys()
choice = self.random.randint(0, len(keys) - 1)
return list(keys)[choice]
if __name__ == '__main__':
markov = Markov()