-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhard_output_pass_at_k.json
49 lines (49 loc) · 1.1 KB
/
hard_output_pass_at_k.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
{
"pass@1": 0.18243243243243243,
"model": "hard_output.jsonl",
"split": "average",
"subset": "hard",
"calibrated": true,
"gt_pass_rate": 0.7364864864864865,
"failed_tasks": [
"BigCodeBench/34",
"BigCodeBench/82",
"BigCodeBench/99",
"BigCodeBench/37",
"BigCodeBench/101",
"BigCodeBench/108",
"BigCodeBench/177",
"BigCodeBench/187",
"BigCodeBench/227",
"BigCodeBench/287",
"BigCodeBench/302",
"BigCodeBench/313",
"BigCodeBench/341",
"BigCodeBench/360",
"BigCodeBench/374",
"BigCodeBench/401",
"BigCodeBench/409",
"BigCodeBench/418",
"BigCodeBench/417",
"BigCodeBench/501",
"BigCodeBench/502",
"BigCodeBench/530",
"BigCodeBench/579",
"BigCodeBench/583",
"BigCodeBench/587",
"BigCodeBench/590",
"BigCodeBench/618",
"BigCodeBench/655",
"BigCodeBench/657",
"BigCodeBench/826",
"BigCodeBench/845",
"BigCodeBench/865",
"BigCodeBench/916",
"BigCodeBench/917",
"BigCodeBench/964",
"BigCodeBench/1003",
"BigCodeBench/1008",
"BigCodeBench/1015",
"BigCodeBench/1019"
]
}