-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdemo.py
86 lines (82 loc) · 2.75 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from ReIFE.evaluator import PairwiseEvaluator
from ReIFE.methods import get_method, get_parser
from ReIFE.models import get_model
from ReIFE.utils import get_dataset_path
from functools import partial
from helper import model_info, method_info, parser_info
import os
from meta_eval import pairwise_meta_eval
def demo():
# Get model info
model_info("llama2vllm")
# Load the model
model_cls = get_model("llama2vllm")
home_dir = os.getenv("HOME")
model = model_cls(
model_pt="meta-llama/Llama-2-7b-chat-hf",
# below are vllm specific args
tensor_parallel_size=1,
download_dir=os.path.join(home_dir, ".cache/huggingface/hub"),
gpu_memory_utilization=0.9,
quantization=None,
swap_space=8,
max_input_len=4000, # max input length
max_model_len=4096, # max model length
dtype="float32",
)
# Load the evaluator
evaluator = PairwiseEvaluator(model)
# Get method info
eval_method = "base_pairwise"
method_info(eval_method)
# Load the evaluation method
eval_fn = get_method(eval_method)
eval_fn = partial(
eval_fn,
# in run.py, below should be given by the config file
instruction_marker="INSTRUCTION",
output_marker="OUTPUT",
)
# Get parser info
parser_info("base_pairwise")
# Load the parser, in run.py, below should be given by the config file
parse_fn = get_parser("base_pairwise")
parse_fn = partial(
parse_fn,
sys1_marker="a", # sys1 marker
sys2_marker="b", # sys2 marker
pattern=r"Output \((.*?)\)", # pattern to match the output
verbose=True,
)
model_name = "llama-2-7b"
prompt_method = "pairwise_vanilla" # it corresponds to the prompt file in /prompts
eval_kwargs = {
"temperature": 0.0,
"top_p": 1.0,
"n": 1, # number of samples
"max_tokens": 64, # max tokens
"logprobs": 32, # top k logprobs to return
}
dataset = "llmbar_natural"
dataset_path = get_dataset_path(dataset)
batch_size = 1
# Run the evaluation
evaluator.pairwise_eval(
eval_fn=eval_fn,
input_dir=dataset_path,
output_dir=f"results/{dataset}.{model_name}.{eval_method}.{prompt_method}.jsonl",
prompt_dir=f"prompts/{prompt_method}.txt",
batch_size=batch_size,
output_text_dir=f"results/outputs/{dataset}.{model_name}.{eval_method}.{prompt_method}.txt",
parse_fn=parse_fn,
verbose=True,
**eval_kwargs,
)
# Run the meta-evaluation
pairwise_meta_eval(
human_dir=dataset_path,
model_dir=f"results/{dataset}.{model_name}.{eval_method}.{prompt_method}.jsonl",
verbose=True,
)
if __name__ == "__main__":
demo()