-
Notifications
You must be signed in to change notification settings - Fork 54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added Epsilion Greedy #219
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
""" | ||
LilUCB app implements CardinalBanditsPureExplorationPrototype | ||
author: Kevin Jamieson | ||
last updated: 11/13/2015 | ||
""" | ||
|
||
import numpy | ||
import numpy.random | ||
import next.utils as utils | ||
|
||
class MyAlg: | ||
|
||
def initExp(self,butler,n,R,failure_probability,params={}): | ||
butler.algorithms.set(key='n', value=n) | ||
butler.algorithms.set(key='delta', value=failure_probability) | ||
butler.algorithms.set(key='R', value=R) | ||
butler.algorithms.set(key='pT', value=0) | ||
|
||
empty_list = numpy.zeros(n).tolist() | ||
butler.algorithms.set(key='Xsum',value=empty_list) | ||
butler.algorithms.set(key='X2sum',value=empty_list) | ||
butler.algorithms.set(key='T',value=empty_list) | ||
|
||
priority_list = numpy.random.permutation(n).tolist() | ||
butler.algorithms.set(key='priority_list',value=priority_list) | ||
|
||
return True | ||
|
||
|
||
def getQuery(self,butler,participant_uid): | ||
|
||
# Get Query Indicies | ||
kv_dict = butler.algorithms.increment_many(key_value_dict={'priority_list':0,'priority_list_cnt':1}) | ||
priority_list = kv_dict['priority_list'] # List of queries? | ||
priority_list_cnt = kv_dict['priority_list_cnt'] # Length of queries? | ||
|
||
# EpsilonGreedy Implementation | ||
key_value_dict = butler.algorithms.get() | ||
t = key_value_dict['pT'] | ||
pt = 1 - (1 / (t + 1)) | ||
if numpy.random.rand() < pt: | ||
index = priority_list[0] | ||
index = numpy.random.choice(priority_list) | ||
|
||
# Update t for next run | ||
newT = t + 1 | ||
butler.algorithms.set(key='pt', value=newT) | ||
|
||
return index | ||
|
||
def processAnswer(self,butler,target_id,target_reward): | ||
butler.algorithms.append(key='S',value=(target_id,target_reward)) | ||
|
||
if numpy.random.rand()<.1: # occurs about 1/10 of trials | ||
butler.job('update_priority_list', {},time_limit=5) | ||
|
||
return True | ||
|
||
def getModel(self,butler): | ||
key_value_dict = butler.algorithms.get() | ||
R = key_value_dict['R'] | ||
n = key_value_dict['n'] | ||
sumX = key_value_dict['Xsum'] | ||
sumX2 = key_value_dict['X2sum'] | ||
T = key_value_dict['T'] | ||
|
||
mu = numpy.zeros(n) | ||
prec = numpy.zeros(n) | ||
for i in range(n): | ||
if T[i]==0 or mu[i]==float('inf'): | ||
mu[i] = -1 | ||
prec[i] = -1 | ||
elif T[i]==1: | ||
mu[i] = float(sumX[i]) / T[i] | ||
prec[i] = R | ||
else: | ||
mu[i] = float(sumX[i]) / T[i] | ||
prec[i] = numpy.sqrt( float( max(1.,sumX2[i] - T[i]*mu[i]*mu[i]) ) / ( T[i] - 1. ) / T[i] ) | ||
|
||
return mu.tolist(),prec.tolist(), T | ||
|
||
def update_priority_list(self,butler,args): | ||
S = butler.algorithms.get_and_delete(key='S') | ||
|
||
if S!=None: | ||
doc = butler.algorithms.get() | ||
|
||
R = doc['R'] | ||
delta = doc['delta'] | ||
n = doc['n'] | ||
Xsum = doc['Xsum'] | ||
X2sum = doc['X2sum'] | ||
T = doc['T'] | ||
|
||
for q in S: | ||
Xsum[q[0]] += q[1] | ||
X2sum[q[0]] += q[1]*q[1] | ||
T[q[0]] += 1 | ||
|
||
mu = numpy.zeros(n) | ||
UCB = numpy.zeros(n) | ||
for i in range(n): | ||
if T[i]==0: | ||
mu[i] = float('inf') | ||
UCB[i] = float('inf') | ||
else: | ||
mu[i] = Xsum[i] / T[i] | ||
UCB[i] = mu[i] + numpy.sqrt( 2.0*R*R*numpy.log( 4*T[i]*T[i]/delta ) / T[i] ) | ||
|
||
# sort by -UCB first then break ties randomly | ||
priority_list = numpy.lexsort((numpy.random.randn(n), -mu)).tolist() | ||
|
||
butler.algorithms.set_many(key_value_dict={'priority_list':priority_list,'priority_list_cnt':0,'Xsum':Xsum,'X2sum':X2sum,'T':T}) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,7 @@ initExp: | |
args: | ||
values: | ||
failure_probability: | ||
description: The maximum probability of an algorithm failing to meet its requirements (e.g. correctness) | ||
description: The maximum probability of an algorithm failing to meet its requirements (e.g. correctness) | ||
type: num | ||
rating_scale: | ||
description: A set of ratings that are presented to the user on the query page. | ||
|
@@ -30,7 +30,7 @@ initExp: | |
values: | ||
alg_id: | ||
description: Supported algorithm types for DuelingBanditsPureExploration. | ||
values: [RoundRobin,LilUCB, KLUCB] | ||
values: [RoundRobin,LilUCB, KLUCB, EpsilonGreedy] | ||
|
||
instructions: | ||
default: Please select, using your mouse or arrow keys, the better rating. | ||
|
@@ -42,22 +42,28 @@ initExp: | |
default: 0.5 | ||
optional: true | ||
|
||
pT: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this required? The YAML file is for algorithm inputs/outputs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could not get butler to work/save parameters unless the variable in the yaml. I am not sure of the 'correct' way to get butler working. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm... that's not how There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I went back and removed the arg in myApp.yaml and it continued to work as expected. I am not sure what I was doing at the time of writing that... |
||
description: tracks time of EpsilonGreedy alg. | ||
type: num | ||
default: 0 | ||
optional: true | ||
|
||
context: | ||
description: Specifies the context. In the query page, this appears above the potential ratings. | ||
type: str | ||
optional: true | ||
|
||
context_type: | ||
description: Specifies the type of the context. If the type is not text a url to the resource should be provided. | ||
type: str | ||
values: [image, text, video] | ||
optional: true | ||
|
||
|
||
processAnswer: | ||
args: | ||
args: | ||
values: | ||
target_reward: | ||
description: The reward of the target. If being used through the query page, this should align with the rating scale specified in initExp. Otherwise can be any numerical value. | ||
type: num | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think there's a typo here (
pt
vspT
). But maybe we could provide a longer name, maybeanswers_received
? That'd be a little easier to maintain.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree, a more descriptive name would be better.