-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbell_machine.py
27 lines (22 loc) · 876 Bytes
/
bell_machine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from bell_test import *
import numpy as np
from kernels import Kernel
from test import data_frame, observed, predicted
#reward = Kernel(observed, predicted)
#reward = reward.gaussian_kernel
action_space = [data_frame[0].fillna(0)]
print(action_space)
state = State(np.array([i for i in range(len(action_space))]),2,action_space)
consecutive_state = state.set_consecutive()
states = [state, consecutive_state]
bell = Bellman(states)
previous_utility = -10000
diff = 1000000
error = 0.000002
while diff > error:
actions = [[i*states[0].utility for i in states[0].action_space], [i*states[0].utility for i in states[1].action_space]]
consecutive_utility = states[0].reward + 0.5*np.argmin([sum(actions[0],sum(actions[1]))])
diff = np.abs(consecutive_utility - previous_utility)
print(diff)
previous_utility = consecutive_utility
print(previous_utility)