Skip to content

Commit

Permalink
Merge branch 'master' of github.com:aig-upf/pi-IW
Browse files Browse the repository at this point in the history
  • Loading branch information
mjunyentb committed Jul 31, 2020
2 parents cbfd0c6 + ac897ff commit 9bfd15e
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions piIW_alphazero.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,15 @@ def alphazero_planning_step(episode_transitions):


# pi-IW planning step function with given hyperparameters
def get_pi_iw_planning_step_fn(actor, planner, policy_fn, tree_budget, discount_factor):
def get_pi_iw_planning_step_fn(actor, planner, policy_fn, tree_budget, discount_factor, temp):
def pi_iw_planning_step(episode_tranistions):
nodes_before_planning = len(actor.tree)
budget_fn = lambda: len(actor.tree) - nodes_before_planning == tree_budget
planner.plan(tree=actor.tree,
successor_fn=actor.generate_successor,
stop_condition_fn=budget_fn,
policy_fn=policy_fn)
return softmax_Q_tree_policy(actor.tree, actor.tree.branching_factor, discount_factor, temp=0)
return softmax_Q_tree_policy(actor.tree, actor.tree.branching_factor, discount_factor, temp=temp)
return pi_iw_planning_step


Expand Down Expand Up @@ -172,7 +172,7 @@ def report(self, episode_rewards, total_interactions):
discount_factor = 0.99
puct_factor = 0.5 # AlphaZero
first_moves_temp = np.inf # AlphaZero
policy_temp = 1 # AlphaZero
policy_temp = 1 # pi-IW and AlphaZero
cache_subtree = True
batch_size = 32
learning_rate = 0.0007
Expand Down Expand Up @@ -245,7 +245,8 @@ def report(self, episode_rewards, total_interactions):
planner=planner,
policy_fn=network_policy,
tree_budget=tree_budget,
discount_factor=discount_factor)
discount_factor=discount_factor,
temp=policy_temp)
learner = SupervisedPolicy(model, optimizer, regularization_factor=regularization_factor, use_graph=True)

# Initialize experience replay: run complete episodes until we exceed both batch_size and dataset_min_transitions
Expand Down

0 comments on commit 9bfd15e

Please sign in to comment.