-
Notifications
You must be signed in to change notification settings - Fork 1
/
perseus.py
89 lines (71 loc) · 2.03 KB
/
perseus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
'''
Attempt at approximate PBVI solver (unfinished)
Todo: work on time dependence
'''
import itertools
import time
import os
import pickle
from random import choice
import numpy as np
from model import CancerPOMDP
from modelConstants import W, M, MNEG, MPOS, SDNEG, SDPOS
from pruneLP import pruneLPPulp, pruneLPCplex, pruneLPCvxopt
class PerseusSolve(CancerPOMDP):
def __init__(self, *args, **kwargs):
CancerPOMDP.__init__(self. *args, **kwargs)
# initialize list of belief states
self.B = []
# initialize alphas
self.alphas = None
with open('alpha/Simple/alpha80.txt') as f:
self.alphas = pickle.loads(f)
# initialize V: list of alpha vectors
self.V = []
def value(self, belief):
'''
return value for a given belief
'''
return max([np.dot(belief, a) for a in self.V])
def collectBeliefs(self, totalBeliefs):
'''
run "trials of exploration" in belief space
sample action / exploration and add it to our set of beliefs until we
hit the number of totalBeliefs we want
'''
return
def backup(self, val, belief):
'''
generate a new alpha vector
for a given value and belief state
'''
# get all the alphas for an action and observation
ab_alphas = [CancerPOMDP.reward()]
return
def isEqual(b1, b2):
'''
compare
'''
return all([x1 == x2 for (x1, x2) in zip(b1, b2)])
def computeValueFunc(self, numIter):
V = self.V
# q: set vs. list
# q: likelihood of repeated alpha vectors / belief states
for _ in numIter:
B_new = self.B
V_new = []
# while B is not empty
while B_new:
b = choice(B_new)
# get alpha vector for belief and value
alpha = self.backup(V, b)
alpha_b = None
if np.dot(alpha, b) >= self.value(V, b):
B_new = [bp for bp in B_new if np.dot(alpha, bp) < self.value(V, bp)]
alpha_b = alpha
else:
B_new = [bp for bp in B_new if bp.isEqual(b)]
alpha_b = max([(alpha, np.dot(alpha, b)) for alpha in V], key = lambda t: t[1])[0]
V_new.append(alpha_b)
V = V_new
return V_new