-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrewrite_proiel_v2.py
95 lines (86 loc) · 2.95 KB
/
rewrite_proiel_v2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# python3
#
# 2016-03-22 bug fix after Skype call
# 2016-06-15 Rewriting the new greek_Haudag.pcases.lemma.lex, SPECIAL VERSION FOR THIS
#
import re
import getopt, sys, os
'''
Bij PROIEL tags: positie 11 (strength) en 12 (inflection) komen te vervallen.
Bij positie 10 wordt p herschreven tot -.
Eventueel later: Ne en Nb worden N-, Df, Dq, en Du worden D-.
usage: python rewrite_proiel_v2.py -f greek.lex {-e voor "eventueel later"}
NIEUW FORMAAT:
greek_Haudag.pcases.lemma.lex:
ἀλλήλων ἀλλήλων Pc-p---mg--i 25
ἀλλήλων ἀλλήλων Pc-p---ng--i 4
ἀλληλέων ἀλληλέων Pc-p---fg--i 2
ἀλληλέων ἀλληλέων Pc-p---mg--i 1
ἀλλήλοις ἀλλήλων Pc-p---md--i 5
ἀλλήλοις ἀλλήλων Pc-p---nd--i 2
Output:
python3 rewrite_proiel_v2.py -f greek_Haudag.pcases.lemma.lex
ἀλλήλων ἀλλήλων Pc-p---mg-
ἀλλήλων ἀλλήλων Pc-p---ng-
ἀλληλέων ἀλληλέων Pc-p---fg-
...
2016-06-15: Peters-Mac-Pro:20160615 pberck$
2016-06-15: python3 rewrite_proiel_v2.py -f greek_Haudag.pcases.lemma.lex > greek_Haudag.pcases.lemma.lex.rewrite
2016-06-15: Skipped 10 NOPEs
2016-06-15: Did 0 'eventuele' fixes
'''
afile = None
eventueel_later = False
simplify = False #Changes Nb into N-, etc
try:
opts, args = getopt.getopt(sys.argv[1:], "ef:s", [])
except getopt.GetoptError as err:
print(str(err))
sys.exit(1)
for o, a in opts:
if o in ("-f"):
afile = a
elif o in ("-e"):
eventueel_later = True
elif o in ("-s"):
simplify = True
else:
assert False, "unhandled option"
nope_cnt = 0
later_cnt = 0
with open(afile, 'r') as f:
for l in f:
l = l.strip()
bits = l.split()
# Ῥωμαῖοι Ῥωμαῖος A--p---mnp-i 2
if len(bits) != 4:
continue
if bits[1] == "NOPE": #hier was geen lemma beschikbaar
nope_cnt += 1
continue
tags = bits[2:]
#print( tags )
for n, k in enumerate(tags[:-1]):
t = tags[n] #tag
f = tags[n+1] #freq
#print( t,f )
if len(t) == 12:
t = t[0:10] #laatste twee weghalen
if t[9] == "p":
t = t[0:9]+"-" #t[9] = '-'
#t[9] = '-'
if eventueel_later:
if t[0:2] == "Ne" or t[0:2] == "Nb":
t = "N-"+t[2:]
later_cnt += 1
#print( t )
#Df, Dq, en Du worden D-
if t[0:2] == "Df" or t[0:2] == "Dq" or t[0:2] == "Du":
#print( t )
t = "D-"+t[2:]
later_cnt += 1
if simplify:
t = t[0]+'-'+t[2:]
print( bits[0], bits[1], t )
sys.stderr.write("Skipped {0} NOPEs\n".format(nope_cnt))
sys.stderr.write("Did {0} 'eventuele' fixes\n".format(later_cnt))