nextml · JoeHolt · Oct 16, 2018 · stsievert · Oct 17, 2018 · JoeHolt
diff --git a/apps/CardinalBanditsPureExploration/algs/EpsilonGreedy.py b/apps/CardinalBanditsPureExploration/algs/EpsilonGreedy.py
@@ -0,0 +1,113 @@
+"""
+LilUCB app implements CardinalBanditsPureExplorationPrototype
+author: Kevin Jamieson
+last updated: 11/13/2015
+"""
+
+import numpy
+import numpy.random
+import next.utils as utils
+
+class MyAlg:
+
+  def initExp(self,butler,n,R,failure_probability,params={}):
+    butler.algorithms.set(key='n', value=n)
+    butler.algorithms.set(key='delta', value=failure_probability)
+    butler.algorithms.set(key='R', value=R)
+    butler.algorithms.set(key='pT', value=0)
+
+    empty_list = numpy.zeros(n).tolist()
+    butler.algorithms.set(key='Xsum',value=empty_list)
+    butler.algorithms.set(key='X2sum',value=empty_list)
+    butler.algorithms.set(key='T',value=empty_list)
+
+    priority_list = numpy.random.permutation(n).tolist()
+    butler.algorithms.set(key='priority_list',value=priority_list)
+
+    return True
+
+
+  def getQuery(self,butler,participant_uid):
+
+    # Get Query Indicies
+    kv_dict = butler.algorithms.increment_many(key_value_dict={'priority_list':0,'priority_list_cnt':1})
+    priority_list = kv_dict['priority_list'] # List of queries?
+    priority_list_cnt = kv_dict['priority_list_cnt'] # Length of queries?
+
+    # EpsilonGreedy Implementation
+    key_value_dict = butler.algorithms.get()
+    t = key_value_dict['pT']
+    pt = 1 - (1 / (t + 1))
+    if numpy.random.rand() < pt:
+        index = priority_list[0]
+    index = numpy.random.choice(priority_list)
+
+    # Update t for next run
+    newT = t + 1
+    butler.algorithms.set(key='pt', value=newT)
+
+    return index
+
+  def processAnswer(self,butler,target_id,target_reward):
+    butler.algorithms.append(key='S',value=(target_id,target_reward))
+
+    if numpy.random.rand()<.1: # occurs about 1/10 of trials
+      butler.job('update_priority_list', {},time_limit=5)
+
+    return True
+
+  def getModel(self,butler):
+    key_value_dict = butler.algorithms.get()
+    R = key_value_dict['R']
+    n = key_value_dict['n']
+    sumX = key_value_dict['Xsum']
+    sumX2 = key_value_dict['X2sum']
+    T = key_value_dict['T']
+
+    mu = numpy.zeros(n)
+    prec = numpy.zeros(n)
+    for i in range(n):
+      if T[i]==0 or mu[i]==float('inf'):
+        mu[i] = -1
+        prec[i] = -1
+      elif T[i]==1:
+        mu[i] = float(sumX[i]) / T[i]
+        prec[i] = R
+      else:
+        mu[i] = float(sumX[i]) / T[i]
+        prec[i] = numpy.sqrt( float( max(1.,sumX2[i] - T[i]*mu[i]*mu[i]) ) / ( T[i] - 1. ) / T[i] )
+
+    return mu.tolist(),prec.tolist(), T
+
+  def update_priority_list(self,butler,args):
+    S = butler.algorithms.get_and_delete(key='S')
+
+    if S!=None:
+      doc = butler.algorithms.get()
+
+      R = doc['R']
+      delta = doc['delta']
+      n = doc['n']
+      Xsum = doc['Xsum']
+      X2sum = doc['X2sum']
+      T = doc['T']
+
+      for q in S:
+        Xsum[q[0]] += q[1]
+        X2sum[q[0]] += q[1]*q[1]
+        T[q[0]] += 1
+
+      mu = numpy.zeros(n)
+      UCB = numpy.zeros(n)
+      for i in range(n):
+        if T[i]==0:
+          mu[i] = float('inf')
+          UCB[i] = float('inf')
+        else:
+          mu[i] = Xsum[i] / T[i]
+          UCB[i] = mu[i] + numpy.sqrt( 2.0*R*R*numpy.log( 4*T[i]*T[i]/delta ) / T[i] )
+
+      # sort by -UCB first then break ties randomly
+      priority_list = numpy.lexsort((numpy.random.randn(n), -mu)).tolist()
+
+      butler.algorithms.set_many(key_value_dict={'priority_list':priority_list,'priority_list_cnt':0,'Xsum':Xsum,'X2sum':X2sum,'T':T})
diff --git a/apps/CardinalBanditsPureExploration/myApp.yaml b/apps/CardinalBanditsPureExploration/myApp.yaml
@@ -6,7 +6,7 @@ initExp:
     args:
       values:
         failure_probability:
-          description: The maximum probability of an algorithm failing to meet its requirements (e.g. correctness)  
+          description: The maximum probability of an algorithm failing to meet its requirements (e.g. correctness)
           type: num
         rating_scale:
           description: A set of ratings that are presented to the user on the query page.
@@ -30,7 +30,7 @@ initExp:
             values:
               alg_id:
                 description: Supported algorithm types for DuelingBanditsPureExploration.
-                values: [RoundRobin,LilUCB, KLUCB]
+                values: [RoundRobin,LilUCB, KLUCB, EpsilonGreedy]
 
         instructions:
           default: Please select, using your mouse or arrow keys, the better rating.
@@ -42,22 +42,28 @@ initExp:
           default: 0.5
           optional: true
 
+        pT:
+          description: tracks time of EpsilonGreedy alg.
+          type: num
+          default: 0
+          optional: true
+
         context:
           description: Specifies the context. In the query page, this appears above the potential ratings.
           type: str
           optional: true
+
         context_type:
           description: Specifies the type of the context. If the type is not text a url to the resource should be provided.
           type: str
           values: [image, text, video]
           optional: true
 
-          
+
 processAnswer:
   args:
     args:
       values:
         target_reward:
           description: The reward of the target. If being used through the query page, this should align with the rating scale specified in initExp. Otherwise can be any numerical value.
           type: num
-