-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathPRF_Score.py
92 lines (73 loc) · 2.05 KB
/
PRF_Score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#-*- coding: utf-8 -*-
from __future__ import division
import sys
# import pdb
e = 0 # wrong words number
c = 0 # correct words number
N = 0 # gold words number
TN = 0 # test words number
testfile = sys.argv[1]
goldfile = sys.argv[2]
inpt1 = open(testfile, 'rU')
inpt2 = open(goldfile, 'rU')
test_raw = []
for ind, line in enumerate(inpt1):
sent = []
for word in line.decode("utf-8").split():
sent.append(word)
test_raw.append(sent)
gold_raw = []
for ind, line in enumerate(inpt2):
sent = []
for word in line.decode("utf-8").split():
sent.append(word)
N += 1
gold_raw.append(sent)
for i, gold_sent in enumerate(gold_raw):
test_sent = test_raw[i]
ig = 0
it = 0
glen = len(gold_sent)
tlen = len(test_sent)
while True:
if ig >= glen or it >= tlen:
break
gword = gold_sent[ig]
tword = test_sent[it]
if gword == tword:
c += 1
else:
lg = len(gword)
lt = len(tword)
while lg != lt:
try:
if lg < lt:
ig += 1
gword = gold_sent[ig]
lg += len(gword)
else:
it += 1
tword = test_sent[it]
lt += len(tword)
except Exception as e:
# pdb.set_trace()
print "Line: %d" % (i + 1)
print "\nIt is the user's responsibility that a sentence in <test file> must",
print "have a SAME LENGTH with its corresponding sentence in <gold file>.\n"
raise e
ig += 1
it += 1
TN += len(test_sent)
e = TN - c
precision = c / TN
recall = c / N
F = 2 * precision * recall / (precision + recall)
error_rate = e / N
print "Correct words: %d"%c
print "Error words: %d"%e
print "Gold words: %d"%N
print
print "precision: %f"%precision
print "recall: %f"%recall
print "F-Value: %f"%F
print "error_rate: %f"%error_rate