-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_vticl.py
139 lines (120 loc) · 5.4 KB
/
run_vticl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import argparse
import copy
import os.path
import random
import numpy as np
import openai
from src.apis import gpt4v
from src.utils import create_dir, write_json, load_json, encode_image
from src.load_dataset import load_hallusionbench_vticl, load_mathvista_vticl, load_vqa_vticl
random.seed(2023)
np.random.seed(2023)
api_key = ""
base_url = None
# Adjust accordingly based on your current proxy settings.
proxy = 'http://127.0.0.1:4780'
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--use_proxy", type=bool, default=False)
parser.add_argument("--input_modal", type=str, default="vticl",
choices=["vticl"])
parser.add_argument("--test_sample", type=int, default=None)
parser.add_argument("--dataset", type=str, default="vqa", choices=['hallusionbench','mathvista','vqa'])
# parser.add_argument("--category", type=str, default='math-targeted-vqa',
# choices=['general-vqa', 'math-targeted-vqa'])
# parser.add_argument("--sub_category", type=str, default='table')
parser.add_argument("--exp_name", type=str, default="public_code_test02",
help="automatically resume experiment by matching the same experiment name")
parser.add_argument("--lt", type=str, default="few_shot", choices=["zero_shot", "few_shot"],
help="zero_shot or few_shot")
#
args = parser.parse_args()
return args
def main():
args = get_args()
# 文件夹
result_dir = f"result/vticl/{args.lt}"
create_dir(result_dir)
if args.dataset == "mathvista":
category = {
'math-targeted-vqa': ['abstract_scene', 'bar_chart', 'function_plot', 'geometry_diagram', 'line_plot',
'puzzle_test', 'scientific_figure', 'scatter_plot', 'synthetic_scene', 'table'],
'general-vqa': ['abstract_scene', 'bar_chart', 'document_image', 'line_plot', 'map_chart', 'medical_image',
'natural_image', 'pie_chart', 'scatter_plot', 'scientific_figure', 'synthetic_scene']
}
elif args.dataset == 'hallusionbench':
category = {'all': [None]}
elif args.dataset == 'vqa':
category = {'counting50': [None], 'yesorno50': [None], 'random50': [None]}
else:
raise NotImplementedError(f"not support dataset: {args.dataset}")
for cate, sub_cates in category.items():
args.category = cate
for i in range(len(sub_cates)):
args.sub_category = sub_cates[i]
result_file = f"{result_dir}/{args.exp_name}-{args.category}-{args.sub_category}-{args.lt}-{args.input_modal}.json"
if args.use_proxy:
openai.proxy = proxy
os.environ["ALL_PROXY"] = proxy
# 数据集
if args.dataset == "mathvista":
datas = eval(f"load_{args.dataset}_vticl")(args.lt, args.category, args.sub_category)
elif args.dataset == 'hallusionbench':
datas = eval(f"load_{args.dataset}_vticl")(args.lt)
elif args.dataset == 'vqa':
datas = eval(f"load_{args.dataset}_vticl")(args.lt, args.category)
else:
raise NotImplementedError(f"not support dataset: {args.dataset}")
if args.test_sample:
datas = random.sample(datas, k=args.test_sample)
results = {}
if os.path.exists(result_file):
results = load_json(result_file)
for data in datas:
id_ = data.get("pid", "")
if id_ in results:
continue
image_inputs = data.get("image_inputs", [])
text_inputs = data.get("text_inputs", [])
messages = [
{
"role": "user",
"content": [],
},
]
type_mapping = {"jpg": "jpeg", "png": "png"}
for image_file, text in zip(image_inputs, text_inputs):
base64_image = encode_image(image_file)
messages[0]['content'].append({
"type": "image_url",
"image_url": f"data:image/{type_mapping[image_file.split('.')[-1]]};base64,{base64_image}",
})
messages[0]['content'].append({"type": "text", "text": text})
test_text = data.get("text", "")
test_file = data.get("image_file")
n = 3
error_flag = False
output = ""
while n > 0 and not error_flag:
output = gpt4v(
messages=messages,
temperature=0,
api_key=api_key,
base_url=base_url
)
if output:
error_flag = True
else:
n -= 1
print("\n")
print(f"ID: {id_}")
print(f"user: {test_file} {test_text}")
print(f"chatgpt: {output}")
print("-" * 20)
temp_data = copy.deepcopy(data)
temp_data["prediction"] = output
results[id_] = temp_data
write_json(results, result_file)
# time.sleep(2)
if __name__ == '__main__':
main()