-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgames.py
329 lines (303 loc) · 13.2 KB
/
games.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
import random
import re
import json
import numpy as np
class Ultimatum:
def __init__(self, player1, player2, **game_kwargs):
self.game_kwargs = game_kwargs
self.player1 = player1
self.player2 = player2
self.log = {}
def set_param(self, r_kwargs):
for k, v in r_kwargs.items():
if k in self.game_kwargs:
self.game_kwargs[k] = v
def play(self, max_attempts=10):
self.player1.update(player_id='player1', **self.game_kwargs)
self.player2.update(player_id='player2', **self.game_kwargs)
print("SYSTEM PROMPT")
print(self.player1.sys_prompt)
self.log['player1-sys'] = self.player1.sys_prompt
player1_prompt = self.write_player1_prompt()
# Error handling if model does not return in parsable format
self.log['player1-retries'] = 0
for attempt in range(1, max_attempts + 1):
self.log['player1-prompt'] = player1_prompt
print("USER PROMPT")
print(player1_prompt)
player1_res = self.player1.decide(player1_prompt)
self.log['player1-res'] = player1_res
print("PROPOSER RESPONSE")
print(player1_res)
offer = self.get_offer(player1_res)
if offer is not None:
break
elif offer is None and attempt == max_attempts:
raise ValueError("Failed to obtain a valid offer")
self.log['player1-retries'] += 1
print("SYSTEM PROMPT")
print(self.player2.sys_prompt)
self.log['player2-sys'] = self.player2.sys_prompt
player2_prompt = self.write_player2_prompt(offer)
self.log['player2-retries'] = 0
for attempt in range(1, max_attempts + 1):
self.log['player2-prompt'] = player2_prompt
print("USER PROMPT")
print(player2_prompt)
player2_res = self.player2.decide(player2_prompt)
self.log['player2-res'] = player2_res
print("RESPONDER RESPONSE")
print(player2_res)
decision = self.get_decision(player2_res)
if decision is not None:
break
elif decision is None and attempt == max_attempts:
raise ValueError("Failed to obtain a valid offer")
self.log['player2-retries'] += 1
return self.log
def write_player1_prompt(self):
player1_prompt = ("You have %d dollars. "
"How much do you offer?") % self.game_kwargs['pool']
return player1_prompt
def write_player2_prompt(self, offer):
player2_prompt = ("The other player has %d dollars. "
"They offer you %d dollars. "
"Do you accept or reject the offer?") % (self.game_kwargs['pool'], offer)
return player2_prompt
def get_offer(self, response):
pattern = r"Offer:\s*[\$]?([0-9]+(?:\.[0-9]+)?)"
match = re.search(pattern, response, re.IGNORECASE)
# Check if a match is found
if match:
try:
offer = int(match.group(1))
except ValueError:
# Conversion to int failed
offer = None
else:
offer = None
return offer
def get_decision(self, response):
try:
pattern = r"Decision:\s*(accept|reject)"
match = re.search(pattern, response, re.IGNORECASE)
# Check if a match is found
if match:
decision = match.group(1)
elif 'accept' in response.split("Decision:")[1] and 'reject' not in response.split("Decision:")[1]:
decision = 'accept'
elif 'reject' in response.split("Decision:")[1] and 'accept' not in response.split("Decision:")[1]:
decision = 'reject'
else:
decision = None
return decision
except:
return None
def save(self, file_name):
with open(file_name, 'w') as f:
json.dump(self.log, f)
return
class Gambling:
def __init__(self, player, **game_kwargs):
self.game_kwargs = game_kwargs
self.player1 = player
self.ablate = game_kwargs['ablate']
self.log = {}
def set_param(self, r_kwargs):
for k, v in r_kwargs.items():
if k in self.game_kwargs:
self.game_kwargs[k] = v
def play(self, max_attempts=1):
self.player1.update(**self.game_kwargs)
self.question = self.game_kwargs['question']
print("SYSTEM PROMPT")
print(self.player1.sys_prompt)
self.log['player-sys'] = self.player1.sys_prompt
if self.game_kwargs['mixed'] is False:
expected_val = (self.question[1] * self.question[0]) + (self.question[3] * self.question[2])
# Generate logarithmically spaced points between 0 and 1
log_spaced_points = np.logspace(0, 1, 7, endpoint=True)
# Scale these points to the range between min_outcome and max_outcome
scaled_points = self.question[0] + (self.question[2] - self.question[0]) * (log_spaced_points - 1) / (
10 - 1)
# Round the outcomes to two decimal places
sure_outcomes = [round(outcome, 2) for outcome in scaled_points]
sure_outcomes.sort(reverse=True)
player_prompt = self.write_player_prompt(expected_val, sure_outcomes)
else:
log_spaced_points = np.logspace(0, 1, 7, endpoint=True)
breakeven = (self.question[0] * self.question[1]) / (1 - self.question[1])
# Scale these points to the range between min_outcome and max_outcome
min_gamble = breakeven - (breakeven * 0.25)
max_gamble = breakeven + (breakeven * 0.75)
scaled_points = min_gamble + (max_gamble - min_gamble) * (log_spaced_points - 1) / (10 - 1)
# Round the outcomes to two decimal places
gambles = [-1 * round(outcome, 2) for outcome in scaled_points]
gambles.sort(reverse=True)
player_prompt = self.write_mixed_player_prompt(gambles)
# Error handling if model does not return in parsable format
for attempt in range(1, max_attempts + 1):
self.log['player-prompt'] = player_prompt
print("USER PROMPT")
print(player_prompt)
player_res = self.player1.decide(player_prompt)
self.log['player-res'] = player_res
print("RESPONSE")
print(player_res)
lowest, highest = self.get_choice(player_prompt, player_res)
print("LOWEST")
print(lowest)
print("HIGHEST")
print(highest)
# if lowest is not None and highest is not None:
# break
# elif lowest is not None and highest is not None and attempt == max_attempts:
# raise ValueError("Failed to obtain a valid answer")
return self.log
def write_player_prompt(self, expected_val, sure_outcomes):
player_prompt = (
"The prospect is %.2f dollars with %d%% probability and %.2f dollars with %d%% probability. "
"The expected value of the prospect is %.2f dollars.\n" %
(self.question[0], self.question[1] * 100, self.question[2], self.question[3] * 100,
expected_val))
player_prompt += "Below are the alternative sure outcomes.\n"
if self.ablate:
random.shuffle(sure_outcomes)
for i, outcome in enumerate(sure_outcomes):
player_prompt = player_prompt + ("%.2f dollars with 100%% probability\n" % outcome)
if self.player1.model_type == 'base':
player_prompt = player_prompt + "\nI choose: "
return player_prompt
def write_mixed_player_prompt(self, gambles):
player_prompt = ("The prospect is %.2f dollars with %d%% probability.") % (
self.question[0], self.question[1] * 100)
player_prompt += "\nBelow are the gambles.\n"
for i, gamble in enumerate(gambles):
expected_val = (gamble * (1 - self.question[1])) + (self.question[0] * self.question[1])
player_prompt = player_prompt + ("%.2f dollars with %d%% probability. The expected value is"
" %2.f dollars.\n") % (
gamble, (1 - self.question[1]) * 100, expected_val)
if self.player1.model_type == 'base':
player_prompt = player_prompt + "I choose: "
return player_prompt
def get_choice(self, prompt, text):
lowest, highest = None, None
count = 0
try:
amts = [p.split(' ')[0] for p in prompt.rstrip().split('\n')[2:]]
sure_options = {i + 1: a for i, a in enumerate(amts)}
i = 1
for res in text.split('\n'):
if len(res.split(':')) == 1:
continue
clean_res = res.split(':')[1].strip().replace('{', '').replace('}', '').lower()
clean_num = \
res.split(':')[0].strip().replace('{', '').replace('}', '').replace('$', '').replace('(', '').replace(
')', '').split(' ')[0]
clean_num = re.sub(r'[^\x00-\x7F]+', '-', clean_num)
if ('100%' not in res.split(':')[0] and 'sure option' not in res.split(':')[
0].lower() and clean_num not in amts):
continue
print(res.split(':')[0].lower())
if 'sure option' in res.split(':')[0].lower():
clean_num = sure_options[i]
i += 1
if clean_res == 'reject':
new_highest = float(clean_num)
count += 1
if highest is None:
highest = new_highest
elif new_highest > highest:
highest = new_highest
elif clean_res == 'accept':
new_lowest = float(clean_num)
count += 1
if lowest is None:
lowest = new_lowest
elif new_lowest < lowest:
lowest = new_lowest
except:
print("Exception (likely parsing)...")
count = 0
lowest = None
highest = None
if count != 7:
print("Fail: count does not equal 7")
lowest = None
highest = None
else:
if lowest is None:
lowest = float('-inf')
if highest is None:
highest = float('inf')
return lowest, highest
def save(self, file_name):
with open(file_name, 'w') as f:
json.dump(self.log, f)
return
class Wait:
def __init__(self, player, **game_kwargs):
self.game_kwargs = game_kwargs
self.ablate = game_kwargs['ablate']
self.question = None
self.player1 = player
self.log = {}
def set_param(self, r_kwargs):
for k, v in r_kwargs.items():
if k in self.game_kwargs:
self.game_kwargs[k] = v
if k == 'question':
self.question = v
def play(self, max_attempts=20):
self.player1.update(**self.game_kwargs)
print("SYSTEM PROMPT")
print(self.player1.sys_prompt)
self.log['player-sys'] = self.player1.sys_prompt
question_ans = [self.question[0], self.question[1]]
if self.ablate:
random.shuffle(question_ans)
player_prompt = self.write_player_prompt(question_ans)
for attempt in range(1, max_attempts + 1):
self.log['player-prompt'] = player_prompt
print("USER PROMPT")
print(player_prompt)
player_res = self.player1.decide(player_prompt)
self.log['player-res'] = player_res
print("RESPONSE")
print(player_res)
ans = self.get_choice(player_prompt, player_res)
if ans is not None:
break
elif ans is None and attempt == max_attempts:
raise ValueError("Failed to obtain a valid answer")
return self.log
def write_player_prompt(self, question_ans):
player_prompt = ("You can either choose:\n"
"A. %s\n"
"B. %s") % (question_ans[0], question_ans[1])
return player_prompt
def get_choice(self, prompt, text):
# Define the regular expression pattern
pattern = r'Answer:\s*(.+)'
# Extract question choices from prompt
first = prompt.split('\n')[1]
second = prompt.split('\n')[2]
# Search for the pattern in the text
match = re.search(pattern, text)
options = [first, second, first.replace('A. ', ''), second.replace('B. ', '')]
if match:
# Extract the expression after "Answer:"
ans = match.group(1).strip()
valid = False
for o in options:
if ans.startswith(o):
valid = True
if not valid:
ans = None
else:
ans = None
return ans
def save(self, file_name):
with open(file_name, 'w') as f:
json.dump(self.log, f)
return