-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
139 lines (106 loc) · 4.78 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
### SETUP ###
## use this classpath if you have a copy of the proPPR jar and its dependencies all in the same directory
#CP:=.:/home/krivard/lib/proPPR.jar
## use this classpath if you're working from source
include Makefile.in
CP:=.:${PROPPR}/bin:${PROPPR}/lib/*
ifeq (,${DATASET})
DATASET=kbp.dataset.2014-0.7
endif
ifeq (,${EXTRACTIONS})
EXTRACTIONS=${DATASET}/ldc.r51.extractions.txt
endif
TRAIN=kbp_train
TEST=kbp_test
PROGRAM=${DATASET}/kbp.crules:${DATASET}/kbp.sparse
#:${DATASET}/kbp.cfacts/entityGivenAnchor_eid_name_p.cfacts:${DATASET}/kbp.cfacts/anchorGivenEntity_eid_name_p.cfacts
JOPTS=-Xmx35G
### PARAMETERS ###
PROVER=dpr:1e-5
THREADS=7
MAXT=13
### TARGETS ###
#VPATH = ${DATASET}
vpath %.cfacts ${DATASET}/kbp.cfacts/
vpath %.queries ${DATASET}
#vpath %.txt ${DATASET}
vpath %.solutions.txt ${DATASET}/proppr-output
all: test
.SECONDARY:
train:params.wts
%.uinference: %.solutions.txt
%.inference: %.trained.solutions.txt
score: ${TEST}.trained.solutions.ann.tsv ${TEST}.solutions.ann.tsv ${TRAIN}.trained.solutions.ann.tsv
prescore: ${TEST}.unnorm.solutions.txt ${TRAIN}.unnorm.solutions.txt
examples: ${TRAIN}.examples ${TEST}.examples
#${TRAIN}.examples ${TEST}.examples
%.examples: %.solutions.txt %.answerQuery_did_qid_eid.queries
./solutions2train.pl ${DATASET}/$*.answerQuery_did_qid_eid.queries $< $*.unreachable $@
#mv $@ ${DATASET}/
%.cooked: %.examples
java ${JOPTS} -cp ${CP} edu.cmu.ml.praprolog.ExampleCooker --programFiles ${PROGRAM} \
--data $< --output $@ --graphKey [email protected] --prover ${PROVER}
%.unnorm.solutions.txt: %.answerQuery_did_qid_eid.queries
java ${JOPTS} -cp ${CP} edu.cmu.ml.praprolog.QueryAnswerer --programFiles ${PROGRAM} \
--queries $< --output $@ --prover ${PROVER} --unnormalized --maxT ${MAXT}
%.solutions.txt: %.answerQuery_did_qid_eid.queries
java ${JOPTS} -cp ${CP} edu.cmu.ml.praprolog.QueryAnswerer --programFiles ${PROGRAM} \
--queries $< --output $@ \
--prover ${PROVER} --maxT ${MAXT}
%.humanreadable.txt: %.humanreadable.queries
java ${JOPTS} -cp ${CP} edu.cmu.ml.praprolog.QueryAnswerer --programFiles ${PROGRAM} \
--queries $< --output $@ --prover ${PROVER} --maxT ${MAXT}
%.trained.solutions.txt: %.answerQuery_did_qid_eid.queries params.wts
java ${JOPTS} -cp ${CP} edu.cmu.ml.praprolog.QueryAnswerer --programFiles ${PROGRAM} \
--queries $< --output $@ \
--prover ${PROVER} --maxT ${MAXT} \
--params params.wts --reranked --trainer mrr --force
params.wts: ${TRAIN}.examples
ifeq (,$(wildcard ${DATASET}/${TRAIN}.examples.cooked))
touch dummy
java ${JOPTS} -cp ${CP} edu.cmu.ml.praprolog.Experiment --programFiles ${PROGRAM} \
--prover ${PROVER} --train $< --test dummy --output ${DATASET}/${TRAIN}.examples.cooked --params $@ --threads ${THREADS} --maxT ${MAXT}
else
java ${JOPTS} -cp ${CP} edu.cmu.ml.praprolog.Trainer \
--prover ${PROVER} --train ${DATASET}/${TRAIN}.examples.cooked --params $@ --threads ${THREADS} --maxT ${MAXT}
endif
pre: ${TRAIN}.examples
java ${JOPTS} -cp ${CP} edu.cmu.ml.praprolog.Tester --tester rt --programFiles ${PROGRAM} \
--prover ${PROVER} --test $< --threads ${THREADS}
pretest: ${TEST}.examples
java ${JOPTS} -cp ${CP} edu.cmu.ml.praprolog.Tester --tester rt --programFiles ${PROGRAM} \
--prover ${PROVER} --test $< --threads ${THREADS}
post: ${TRAIN}.examples params.wts
java ${JOPTS} -cp ${CP} edu.cmu.ml.praprolog.Tester --tester rt --programFiles ${PROGRAM} \
--prover ${PROVER} --test $< --params params.wts --threads ${THREADS}
test: ${TEST}.examples params.wts
java ${JOPTS} -cp ${CP} edu.cmu.ml.praprolog.Tester --tester rt --programFiles ${PROGRAM} \
--prover ${PROVER} --test $<--params params.wts --threads ${THREADS} --force
####### results processing:
TAB:=$(shell echo "\t")
#yields: qid did rank score eid
%.solutions.tsv: %.solutions.txt
sed 's/-1=c[[]//;s/]//;s/,-1[)]//;s/answerQuery[(]//;s/,/\t/' $< | \
awk '/#/ {q = $$5 "\t" $$4;} /^[0-9]/ {print q "\t" $$0}' > $@
#yields: qid did rank score eid xtype ntype
%.solutions.ann.tsv: %.solutions.ann.tsv.proppr %.solutions.ann.tsv.remaining
cat $^ |\
sort -k 1b,1 -k 3n,3 > $@
%.solutions.ann.tsv.proppr: %.solutions.tsv extractedType_qid_t.cfacts entityType_eid_t.cfacts
sort -k 1b,1 $(word 1,$^) | \
join -a 1 -2 2 -t "$(TAB)" - $(word 2,$^) | \
sort -k 5b,5 | \
join -a 1 -1 5 -2 2 -t "$(TAB)" - $(word 3,$^) | \
awk 'BEGIN{FS=OFS="\t"}{\
xt = "na"; nt = "ukn"; if ($$6 == "extractedType") { xt = $$7; } else { nt = $$7; }\
print $$2,$$3,$$4,$$5,$$1,xt,nt}' | \
sort -k 1b,1 -k 3n,3 |\
uniq > $@
# eid qid did rank score "extractedType" xtype "entityType" ntype
%.solutions.ann.tsv.remaining: %.solutions.ann.tsv.proppr ${EXTRACTIONS}
cut -f 1 $< | \
uniq | \
join -t "${TAB}" -iv 2 - ${EXTRACTIONS} | \
awk 'BEGIN{FS=OFS="\t"}{print tolower($$1),$$2,"1","1e-15","nil","per","per"}' |\
uniq > $@
.PRECIOUS: %.solutions.txt %.examples params.wts