-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy path_IS.py
51 lines (49 loc) · 1.98 KB
/
_IS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from _util import *
def cal_Bernstein_CI(Ys, V_min = -100, V_max = 0):
# upper?
b = 1
CIs = []
n = len(Ys)
Ys = (Ys - V_min) / (V_max - V_min)
u = np.mean(Ys)
pair_variance = 0
for i in range(n):
for j in range(n):
pair_variance += (Ys[i] - Ys[j]) ** 2
for delta in [0.5, 0.025]:
log_term = log(2 / delta)
pair_variance_term = np.sqrt(pair_variance * log_term / (n - 1))
lower = u - (7 * b / 3) * log_term / (n - 1) - 1 / n * pair_variance_term
upper = u + (7 * b / 3) * log_term / (n - 1) + 1 / n * pair_variance_term
lower = lower * (V_max - V_min) + V_min
upper = upper * (V_max - V_min) + V_min
CIs.append([lower, upper])
return CIs # [90%, 95%]
def cal_step_IS(trajs_train_resp, gamma, pi_behva, pi1):
V_seeds = []
seed = 0
for trajs in trajs_train_resp:
V_trajs = []
V_max = np.max(([[step[2] for step in traj] for traj in trajs])) / (1 - gamma)
V_min = np.min(([[step[2] for step in traj] for traj in trajs])) / (1 - gamma)
for traj in trajs:
w_this_traj = []
As = [step[1] for step in traj]
As = arr(As).astype(np.int)
Rs = arr([step[2] for step in traj])
########################
Ss = arr([step[0] for step in traj])
behav_probs = pi_behva.get_A_prob(Ss)
behav_A_probs = behav_probs[range(len(behav_probs)), As]
tp_probs = pi1.get_A_prob(Ss)
tp_A_probs = tp_probs[range(len(tp_probs)), As]
r_this = tp_A_probs / behav_A_probs
########################
for t in range(1, len(traj) + 1):
w_this_traj.append(multiplyList((tp_A_probs / behav_A_probs)[:t]))
w_Rs = arr(w_this_traj) * Rs
V = sum(w_Rs[t] * gamma ** t for t in range(len(w_Rs)))
V_trajs.append(V)
V_seeds.append([np.array(V_trajs), V_min, V_max])
seed += 1
return V_seeds