-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathexpt.py
206 lines (173 loc) · 8.33 KB
/
expt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# (C) William W. Cohen and Carnegie Mellon University, 2016
#
# support for running experiments
#
import sys
import time
import logging
import collections
import traceback
from tensorlog import comline
from tensorlog import config
from tensorlog import dataset
from tensorlog import declare
from tensorlog import learn
from tensorlog import matrixdb
from tensorlog import mutil
from tensorlog import plearn
def fulltype(o):
return o.__module__ + "." + o.__class__.__name__
class Expt(object):
def __init__(self,configDict):
self.config = configDict
def run(self):
print(('Expt configuration:',self.config))
return self._run(**self.config)
def _run(self,
prog=None, trainData=None, testData=None, targetMode=None,
savedTestPredictions=None, savedTestExamples=None, savedTrainExamples=None, savedModel=None,
learner=None):
""" Run an experiment.
The stages are
- if targetMode is specified, extract just the examples from that mode from trainData and testData
- evaluate the untrained program on the train and test data and print results
- train on the trainData
- if savedModel is given, write the learned database, including the trained parameters,
to that directory.
- if savedTestPredictions is given, write the test-data predictions in ProPPR format
- if savedTestExamples (savedTrainExamples) is given, save the training/test examples in ProPPR format
"""
if targetMode:
targetMode = declare.asMode(targetMode)
trainData = trainData.extractMode(targetMode)
if testData is not None:
testData = testData.extractMode(targetMode)
if not learner: learner = learn.FixedRateGDLearner(prog)
TP0 = Expt.timeAction(
'running untrained theory on train data',
lambda:learner.datasetPredict(trainData))
Expt.printStats('untrained theory','train',trainData,TP0)
if testData is not None:
UP0 = Expt.timeAction(
'running untrained theory on test data',
lambda:learner.datasetPredict(testData))
Expt.printStats('untrained theory','test',testData,UP0)
Expt.timeAction('training %s' % fulltype(learner), lambda:learner.train(trainData))
TP1 = Expt.timeAction(
'running trained theory on train data',
lambda:learner.datasetPredict(trainData))
if testData is not None:
UP1 = Expt.timeAction(
'running trained theory on test data',
lambda:learner.datasetPredict(testData))
Expt.printStats('..trained theory','train',trainData,TP1)
if testData is not None:
testAcc,testXent = Expt.printStats('..trained theory','test',testData,UP1)
else:
testAcc,testXent = None,None
if savedModel:
Expt.timeAction('saving trained model', lambda:prog.db.serialize(savedModel))
if savedTestPredictions and testData:
#todo move this logic to a dataset subroutine
open(savedTestPredictions,"w").close() # wipe file first
def doit():
qid=0
for mode in testData.modesToLearn():
qid+=Expt.predictionAsProPPRSolutions(savedTestPredictions,mode.functor,prog.db,UP1.getX(mode),UP1.getY(mode),True,qid)
Expt.timeAction('saving test predictions', doit)
if savedTestExamples and testData:
Expt.timeAction('saving test examples',
lambda:testData.saveProPPRExamples(savedTestExamples,prog.db))
if savedTrainExamples:
Expt.timeAction('saving train examples',
lambda:trainData.saveProPPRExamples(savedTrainExamples,prog.db))
if savedTestPredictions and savedTestExamples and testData:
print(('ready for commands like: proppr eval %s %s --metric auc --defaultNeg' \
% (savedTestExamples,savedTestPredictions)))
return testAcc,testXent
@staticmethod
def predictionAsProPPRSolutions(fileName,theoryPred,db,X,P,append=False,start=0):
"""Print X and P in the ProPPR solutions.txt format."""
fp = open(fileName,'a' if append else 'w')
dx = db.matrixAsSymbolDict(X,typeName=db.schema.getDomain(theoryPred,2))
dp = db.matrixAsSymbolDict(P,typeName=db.schema.getRange(theoryPred,2))
n=max(dx.keys())
for i in range(n+1):
#assert i in dp, "keys dp: %s\nkeys dx: %s" % (dp.keys(),dx.keys())
dix = dx[i]
dip = {}
if i in dp:
dip = dp[i]
assert len(list(dix.keys()))==1,'X %s row %d is not onehot: %r' % (theoryPred,i,dix)
x = list(dix.keys())[0]
fp.write('# proved %d\t%s(%s,X1).\t999 msec\n' % (i+1+start,theoryPred,x))
scoresdPs = reversed(sorted([(py,y) for (y,py) in list(dip.items())]))
for (r,(py,y)) in enumerate(scoresdPs):
fp.write('%d\t%.18f\t%s(%s,%s).\n' % (r+1,py,theoryPred,x,y))
return n
@staticmethod
def timeAction(msg, act):
"""Do an action encoded as a callable function, return the result,
while printing the elapsed time to stdout."""
print((msg,'...'))
start = time.time()
result = act()
print((msg,'... done in %.3f sec' % (time.time()-start)))
return result
@staticmethod
def printStats(modelMsg,testSet,goldData,predictedData):
"""Print accuracy and crossEntropy for some named model on a named eval set."""
acc = learn.Learner.datasetAccuracy(goldData,predictedData)
xent = learn.Learner.datasetCrossEntropy(goldData,predictedData,perExample=True)
print(('eval',modelMsg,'on',testSet,': acc',acc,'xent/ex',xent))
return (acc,xent)
# a useful main
if __name__=="__main__":
usageLines = [
'expt-specific options, given after the argument +++:',
' --savedModel e # where e is a filename',
' --learner f # where f is the name of a learner class',
' --learnerOpts g # g is a string that "evals" to a python dict',
' --weightEpsilon eps # parameter weights multiplied by eps',
' --params p1/k1,.. # comma-sep list of functor/arity pairs'
]
argSpec = ["learner=", "savedModel=", "learnerOpts=", "targetMode=",
"savedTestPredictions=", "savedTestExamples=", "savedTrainExamples=",
"params=","weightEpsilon="]
optdict,args = comline.parseCommandLine(
sys.argv[1:],
extraArgConsumer="expt", extraArgSpec=argSpec, extraArgUsage=usageLines
)
weightEpsilon = float(optdict.get('--weightEpsilon',1.0))
print(('weightEpsilon = ',weightEpsilon))
if '--params' in optdict:
paramSpecs = optdict['--params'].split(",")
for spec in paramSpecs:
functor,arity = spec.split("/")
optdict['db'].markAsParameter(functor,int(arity))
optdict['prog'].setFeatureWeights(epsilon=weightEpsilon)
optdict['prog'].setRuleWeights(epsilon=weightEpsilon)
learner = None
if 'learner' in optdict:
try:
optdict['learner'] = eval(optdict['learner'])
#so darn hard to get the number of quotes right in Makefile/shell, so eval 'while'...
while type(optdict['learnerOpts'])==type(""):
optdict['learnerOpts'] = eval(optdict.get('learnerOpts','{}'))
print(("decoded learner spec to "+repr(optdict['learner'])+" args "+repr(optdict['learnerOpts'])))
learner = optdict['learner'](optdict['prog'], **optdict['learnerOpts'])
except Exception as ex:
print(('exception evaluating learner specification "%s"' % optdict['--learner']))
traceback.print_exc(file=sys.stdout)
raise ex
params = {'prog':optdict['prog'],
'trainData':optdict['trainData'],
'testData':optdict['testData'],
'learner':learner,
'savedModel':optdict.get('savedModel'),
'targetMode':optdict.get('targetMode'),
'savedTestPredictions':optdict.get('savedTestPredictions'),
'savedTestExamples':optdict.get('savedTestExamples'),
'savedTrainExamples':optdict.get('savedTrainExamples'),
}
Expt(params).run()