forked from YangLing0818/buffer-of-thought-llm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_benchmarks.py
67 lines (56 loc) · 2.28 KB
/
run_benchmarks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import json
from bot_pipeline import BoT
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--task_name',type=str,default='gameof24',choices=['gameof24','checkmate','wordsorting'])
parser.add_argument('--api_key',type=str,help='input your api key here')
parser.add_argument('--model_id',type=str,default='gpt-4o',help='Input model id here, if use local model, input the path to the local model')
GameOf24 = """
Let's play a game called 24. You'll be given four integers, and your objective is to use each number only once, combined with any of the four arithmetic operations (addition, subtraction, multiplication, and division) and parentheses, to achieve a total of 24. For example, if the input is 4, 7, 8, and 8, the output could be 7 * 8 - 4 * 8 = 24. You only need to find one feasible solution!
Input:
"""
CheckmateInOne = """
Given a series of chess moves written in Standard Algebraic Notation (SAN), determine the next move that will result in a checkmate.
Input:
"""
WordSorting = """
Sort a list of words alphabetically, placing them in a single line of text separated by spaces.
Input:
"""
if __name__ == "__main__":
args = parser.parse_args()
task = args.task_name
api_key = args.api_key
model_id = args.model_id
benchmark_dict = {
'gameof24':GameOf24,
'checkmate':CheckmateInOne,
'wordsorting':WordSorting
}
path_dict = {
'gameof24':'benchmarks/gameof24.jsonl',
'checkmate':'benchmarks/CheckmateInOne.jsonl',
'wordsorting':'benchmarks/word_sorting.jsonl'
}
buffer_dict = {
'gameof24':0,
'checkmate':1,
'wordsorting':2
}
user_prompt = benchmark_dict[task]
path = path_dict[task]
problem_id = buffer_dict[task]
for line in (open(path)):
input = json.loads(line)['input']
user_input = user_prompt + input
test_bot = BoT(
user_input = user_input,
problem_id= problem_id,
api_key= api_key,
model_id= model_id
)
result = test_bot.bot_run()
tmp = {'input':input,'result':result}
with open(f'test_results/BoT_{task}.jsonl', 'a+', encoding='utf-8') as file:
json_str = json.dumps(tmp)
file.write(json_str + '\n')