-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgen_scores.py
96 lines (82 loc) · 4.88 KB
/
gen_scores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# python3 gen_scores.py model/my_model/ preprocessing/data/subset-1/train-subset-1.json 1000 8500
# first number is frequency, second number is start
from constants import *
import os
from pathlib import Path
from produce_answers import load_embeddings_index, run_evaluation
import subprocess
import sys
import torch as th
DEFAULT_FREQ = 200 # Evals every 200 global steps.
DEFAULT_START = DEFAULT_FREQ
ENABLE_AUTOMATIC_PLOT_GEN = True
TEMP_JSON_FILENAME = "_temp_gen_scores.json" # Doesn't matter, just didn't want it to clash with other people's temp files.
def custom_print(x, flush=False):
print("[gen_scores] ",end='',flush=flush)
print(x)
def gen_predictions(model_path, dataset_path, glove):
tokenized_dataset_path = ".".join(dataset_path.split(".")[:-1])+"-tokenized.json"
custom_print("Calling produce_answers.run_evaluation()...")
run_evaluation(str(model_path), tokenized_dataset_path, TEMP_JSON_FILENAME, glove)
def run_eval(dataset_path):
cmd = ["python3", "evaluate-v2.0.py", dataset_path, TEMP_JSON_FILENAME]
custom_print("Calling subprocess '%s'..." % " ".join(list(map(str,cmd))))
p = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, universal_newlines=True)
outp = p.stdout
em_score = outp.split('"exact": ')[1].split(",")[0]
f1_score = outp.split('"f1": ')[1].split(",")[0]
total = outp.split('"total": ')[1].split(",")[0]
HasAns_exact = None if not ("HasAns_exact") in outp else outp.split('"HasAns_exact": ')[1].split(",")[0]
HasAns_f1 = None if not ("HasAns_f1") in outp else outp.split('"HasAns_f1": ')[1].split(",")[0]
HasAns_total = None if not ("HasAns_total") in outp else outp.split('"HasAns_total": ')[1].split(",")[0]
NoAns_exact = None if not ("NoAns_exact") in outp else outp.split('"NoAns_exact": ')[1].split(",")[0]
NoAns_f1 = None if not ("NoAns_f1") in outp else outp.split('"NoAns_f1": ')[1].split(",")[0]
NoAns_total = None if not ("NoAns_total") in outp else outp.split('"NoAns_total": ')[1].split(",")[0]
return em_score, f1_score, total, HasAns_exact, HasAns_f1, HasAns_total, NoAns_exact, NoAns_f1, NoAns_total
def main():
if len(sys.argv) not in [3,4,5]:
custom_print("Usage example: \npython3 gen_scores.py ./model/2020-04-07_00-10-37\[LR1-00e-03_Q86821_B64_H200_RS1\]/ preprocessing/data/subset-4/train-subset-4.json")
custom_print("Can add optional FREQ parameter at the end (integer regulating frequency of evaluation measured in global steps).")
return
model_dir = sys.argv[1]
if model_dir[-1] != '/':
model_dir += '/'
dataset_path = sys.argv[2]
freq = DEFAULT_FREQ if len(sys.argv)<=3 else int(sys.argv[3])
start = DEFAULT_START if len(sys.argv)<=4 else int(sys.argv[4])
scores_filename = "scores_"+dataset_path.split("/")[-1].split(".")[0]+".log"
scores_path = model_dir+scores_filename
next_global_step_to_eval = start
# if os.path.exists(scores_path):
# custom_print("%s already exists. Do you want to overwrite it? [y/n] + Enter" % scores_filename)
# user_ans = str(input())
# if "y" not in user_ans:
# return
with open(scores_path, "a") as scores_file:
model_paths = sorted(Path(model_dir).iterdir(), key=os.path.getmtime)
model_paths = [p for p in model_paths if ".par" in str(p)]
glove = load_embeddings_index() # Do this once. Takes a few minutes!
for i, model_path in enumerate(model_paths):
assert(".par" in str(model_path))
global_step = th.load(model_path)[SERIALISATION_KEY_GLOBAL_STEP]
if global_step >= next_global_step_to_eval:
next_global_step_to_eval += freq
custom_print("Evaluating model: '%s' ..." % model_path,flush=True)
gen_predictions(model_path, dataset_path, glove)
em_score, f1_score, total, HasAns_exact, HasAns_f1, HasAns_total, NoAns_exact, NoAns_f1, NoAns_total = run_eval(dataset_path)
scores_file.write("\n")
scores_file.write(str(global_step) + "," + str(em_score) + "," + str(f1_score) + "," + str(total))
if HasAns_exact is not None:
scores_file.write("," + str(HasAns_exact) + "," + str(HasAns_f1) + "," + str(HasAns_total))
if NoAns_exact is not None:
scores_file.write("," + str(NoAns_exact) + "," + str(NoAns_f1) + "," + str(NoAns_total))
print("gen_scores finished. Log written to:", scores_path)
if ENABLE_AUTOMATIC_PLOT_GEN:
cmd = ["python3", "plot_f1_vs_loss.py", scores_path]
custom_print("Calling subprocess '%s'..." % " ".join(list(map(str,cmd))))
subprocess.run(cmd, check=True)
cmd = ["python3", "plot_f1_vs_loss.py", scores_path, "f1"]
custom_print("Calling subprocess '%s'..." % " ".join(list(map(str,cmd))))
subprocess.run(cmd, check=True)
if __name__ == "__main__":
main()