-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetters_parameters.py
422 lines (351 loc) · 18.5 KB
/
getters_parameters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
import spacy
from spacy.tokens import Doc, Token
nlp = spacy.load("en_core_web_lg")
from typing import Optional
import re
import importlib
import ogree_wiki as wiki
importlib.reload(wiki)
FUNCTIONS = globals()
def convertToUnit(processed_entry : Doc, index : int, value, finalUnit : str) :
FACTORS = {
'km': 1000,
'm': 1,
'dm': 0.1,
'cm': 0.01,
'mm': 0.001,
"t" : 1/0.6
}
currentUnit = None
for unit in FACTORS.keys() :
match = re.findall(f"{value}[ ]*{unit}", processed_entry[index].text)
if match :
currentUnit = unit
break
if not currentUnit and index+1 < len(processed_entry) :
for unit in FACTORS.keys() :
match = re.findall(f"^{unit}$", processed_entry[index+1].text.strip())
if match :
currentUnit = unit
break
if currentUnit and finalUnit and finalUnit in FACTORS.keys() :
return float(value)*(1/FACTORS[currentUnit])*FACTORS[finalUnit]
else :
return float(value)
def template(processed_entry : Doc, index : int, attachedEntity : str, lastKeyWordIndex : int, nextKeyWordIndex : int, forbiddenIndexes : list = []) :
next_words = [token for token in processed_entry[index+1:nextKeyWordIndex] if token.i not in forbiddenIndexes]
previous_words = [token for token in processed_entry[lastKeyWordIndex+1:index] if token.i not in forbiddenIndexes]
def isTemplate(token : Token) -> bool :
if (not token.has_vector
or (token.pos_ in ["NOUN", "PROPN", "PUNCT", "X"] and token.text != ",")) :
return True
return False
# find the first token of the template name
def findNameFirst(processed_entry : Doc, index : int) :
name = ""
indexes = []
if list(processed_entry[index].children) : # we first seek in the children
for token in processed_entry[index].subtree :
tokenRealIndex = list(processed_entry).index(token)
if tokenRealIndex in forbiddenIndexes : continue
if isTemplate(token) :
name, indexes = findFullName(processed_entry, tokenRealIndex)
break
if not name : # if none found, seek in the next words
for token in next_words :
tokenRealIndex = list(processed_entry).index(token)
if isTemplate(token) :
name, indexes = findFullName(processed_entry, tokenRealIndex)
break
if not name : # finally in the previous
for token in previous_words :
tokenRealIndex = list(processed_entry).index(token)
if isTemplate(token) :
name, indexes = findFullName(processed_entry, tokenRealIndex)
break
return name, indexes
# find the full name of the template, from the start token
def findFullName(processed_entry : Doc, index : int) :
name = ""
indexes = []
currentIndex = index
isNameFinished = False
while not isNameFinished :
validIndex = currentIndex not in forbiddenIndexes
indexJump = 1
# if there is a dash, we take both last and next word (and the dash)
if (validIndex
and processed_entry[currentIndex].lower_ in ["-","/","\\"]
and currentIndex +1 < nextKeyWordIndex
and currentIndex -1 > lastKeyWordIndex) :
if (not validIndex) or currentIndex+1 in forbiddenIndexes or currentIndex-1 in forbiddenIndexes :
continue
if not currentIndex-1 in indexes :
name += processed_entry[currentIndex-1].text
indexes.append(currentIndex-1)
name += processed_entry[currentIndex].text + processed_entry[currentIndex+1].text
indexes.extend([currentIndex, currentIndex+1])
indexJump = 2
elif validIndex and (isTemplate(processed_entry[currentIndex]) or processed_entry[currentIndex].pos_ == "NUM") :
name += "-" + processed_entry[currentIndex].text
indexes.append(currentIndex)
else :
isNameFinished = True
if currentIndex + indexJump > len(processed_entry)-1 :
isNameFinished = True
else :
currentIndex += indexJump
if name[-1] == "-" : name = name[:-1]
if name[0] == "-" : name = name[1:]
return name, indexes
resultValues = ""
resultIndexes = []
# if synonym of "called", start to seek from this token
if (index +1 <= len(processed_entry)-1
and processed_entry[index+1].similarity(nlp("called")[0]) > 0.5
and processed_entry[index].is_ancestor(processed_entry[index+1])) :
resultValues, resultIndexes = findNameFirst(processed_entry, index+1)
else :
resultValues, resultIndexes = findNameFirst(processed_entry, index)
if not resultValues :
return None, resultIndexes
else :
return resultValues, resultIndexes
# TODO : manage width etc
def position(processed_entry : Doc, index : int, attachedEntity : str, lastKeyWordIndex : int, nextKeyWordIndex : int, forbiddenIndexes : list = []) :
next_words = [token for token in processed_entry[index+1:nextKeyWordIndex] if token.i not in forbiddenIndexes]
previous_words = [token for token in processed_entry[lastKeyWordIndex+1:index] if token.i not in forbiddenIndexes]
LENGTH_CRITERIA = [2]
if attachedEntity == "device" : LENGTH_CRITERIA = [1]
if attachedEntity in ["rack", "corridor"] : LENGTH_CRITERIA.append(3)
positionList = []
for token in next_words :
foundValue = re.findall("^[-]*\d+[.]*\d*", token.text)
if foundValue :
positionList.append((foundValue[0], token.i))
if not len(positionList) in LENGTH_CRITERIA : # if none found in next words
positionList = []
for token in previous_words :
foundValue = re.findall("^[-]*\d+[.]*\d*", token.text)
if foundValue :
positionList.append((foundValue[0], token.i))
if not len(positionList) in LENGTH_CRITERIA :
return None, []
if attachedEntity and attachedEntity in wiki.DEFAULT_UNITS.keys() and "position" in wiki.DEFAULT_UNITS[attachedEntity].keys() :
unitList = wiki.DEFAULT_UNITS[attachedEntity]["position"]
resultValues = [convertToUnit(processed_entry, x[1], x[0], unitList[index]) for (index,x) in enumerate(positionList)]
else :
resultValues = [float(x[0]) for x in positionList]
if attachedEntity == "device" :
resultValues = int(resultValues[0])
resultIndexes = [x[1] for x in positionList]
return resultValues, resultIndexes
def rotation(processed_entry : Doc, index : int, attachedEntity : str, lastKeyWordIndex : int, nextKeyWordIndex : int, forbiddenIndexes : list = []) :
next_words = [token for token in processed_entry[index+1:nextKeyWordIndex] if token.i not in forbiddenIndexes]
previous_words = [token for token in processed_entry[lastKeyWordIndex+1:index] if token.i not in forbiddenIndexes]
rotationKeyWordsDict = {"front": [0, 0, 180],
"rear": [0, 0, 0],
"left": [0, 90, 0],
"right": [0, -90, 0],
"top": [90, 0, 0],
"bottom": [-90, 0, 0]
}
if attachedEntity in ["rack", "corridor"] :
# seek key words in the dict above
rotationKeyWordsList = list(rotationKeyWordsDict.keys())
for token in next_words :
if token.text in rotationKeyWordsList and processed_entry[index].is_ancestor(token) :
return rotationKeyWordsDict[token.text]
for token in previous_words :
if token.text in rotationKeyWordsList and processed_entry[index].is_ancestor(token) :
return rotationKeyWordsDict[token.text]
LENGTH_CRITERIA = 3 if attachedEntity in ["rack", "corridor"] else 1
isRotationNegative = False
rotationList = []
for token in next_words :
foundValue = re.findall("^[-]*\d+[.]*\d*", token.text)
if foundValue :
rotationList.append((foundValue[0], token.i))
if len(rotationList) != LENGTH_CRITERIA :
rotationList = []
for token in previous_words :
foundValue = re.findall("^[-]*\d+[.]*\d*", token.text)
if foundValue :
rotationList.append((foundValue[0], token.i))
if len(rotationList) != LENGTH_CRITERIA :
return None, []
else :
resultValues = [float(x[0]) for x in rotationList]
resultIndexes = [x[1] for x in rotationList]
isRotationNegative = re.search("counter.*clockwise", "".join([token.lower_ for token in next_words]+[token.text for token in previous_words]))
if isRotationNegative :
rotationList = [-x for x in rotationList]
if not attachedEntity in ["rack", "corridor"] :
resultValues = resultValues[0]
return resultValues, resultIndexes
def size(processed_entry : Doc, index : int, attachedEntity : str, lastKeyWordIndex : int, nextKeyWordIndex : int, forbiddenIndexes : list = []) :
next_words = [token for token in processed_entry[index+1:nextKeyWordIndex] if token.i not in forbiddenIndexes]
previous_words = [token for token in processed_entry[lastKeyWordIndex+1:index] if token.i not in forbiddenIndexes]
LENGTH_CRITERIA = 3
if attachedEntity == "pillar" :
LENGTH_CRITERIA = 2
if attachedEntity == "device" :
LENGTH_CRITERIA = 1
sizeList = []
for token in next_words :
foundValue = re.findall("^[-]*\d+[.]*\d*", token.text)
if foundValue :
sizeList.append((foundValue[0], token.i))
if len(sizeList) != LENGTH_CRITERIA : # if none found in next words
sizeList = []
for token in previous_words :
foundValue = re.findall("^[-]*\d+[.]*\d*", token.text)
if foundValue :
sizeList.append((foundValue[0], token.i))
if len(sizeList) != LENGTH_CRITERIA :
return None, []
else :
if attachedEntity and attachedEntity in wiki.DEFAULT_UNITS.keys() and "size" in wiki.DEFAULT_UNITS[attachedEntity].keys() :
unitList = wiki.DEFAULT_UNITS[attachedEntity]["size"]
resultValues = [convertToUnit(processed_entry, x[1], x[0], unitList[index]) for (index,x) in enumerate(sizeList)]
else :
resultValues = [float(x[0]) for x in sizeList]
resultIndexes = [x[1] for x in sizeList]
if attachedEntity == "device" :
resultValues = resultValues[0]
return resultValues, resultIndexes
def axisOrientation(processed_entry : Doc, index : int, attachedEntity : str, lastKeyWordIndex : int, nextKeyWordIndex : int, forbiddenIndexes : list = []) :
next_words = [token for token in processed_entry[index+1:nextKeyWordIndex] if token.i not in forbiddenIndexes]
previous_words = [token for token in processed_entry[lastKeyWordIndex+1:index] if token.i not in forbiddenIndexes]
resultIndexes = []
#An axis Orientation can be any combinason of [+/-]x[+/-]y. eg: +x+y or -x+y
axisX = re.findall("[-\+]?[ ]?x", "".join([token.lower_ for token in next_words]))
axisY = re.findall("[-\+]?[ ]?y", "".join([token.lower_ for token in next_words]))
if len(axisX) in [0,1] and len(axisY) in [0,1] and len(axisX)+len(axisY) != 0 :
# get the indexes
for token in next_words :
if token.lower_ in "".join(axisX + axisY) :
resultIndexes.append(token.i)
# if not found in the next words, seek in the previous words
if len(axisX) not in [0,1] or len(axisY) not in [0,1] or len(axisX)+len(axisY) == 0 :
resultIndexes = []
axisX = re.findall("[-\+]?[ ]?x", "".join([token.lower_ for token in previous_words]))
axisY = re.findall("[-\+]?[ ]?y", "".join([token.lower_ for token in previous_words]))
if len(axisX) in [0,1] and len(axisY) in [0,1] and len(axisX)+len(axisY) != 0 :
for token in previous_words :
if token.lower_ in "".join(axisX + axisY) :
resultIndexes.append(token.i)
if len(axisX) not in [0,1] or len(axisY) not in [0,1] or len(axisX)+len(axisY) == 0 :
return None, resultIndexes
# if the value is not comprehensive
resultValues = ""
if len(axisX) == 0 :
resultValues = "+x" + axisY[0].replace(" ","")
elif len(axisY) == 0 :
resultValues = axisX[0].replace(" ","") + "+y"
else :
resultValues = axisX[0].replace(" ","") + axisY[0].replace(" ","")
return resultValues, resultIndexes
# TODO : to change ?
def unit(processed_entry : Doc, index : int, attachedEntity : str, lastKeyWordIndex : int, nextKeyWordIndex : int, forbiddenIndexes : list = []) :
DICT_UNIT = {
"meter" : "m",
"tile" : "t",
"foot" : "f"
}
resultValues = []
resultIndexes = []
allWords = [token for token in processed_entry[lastKeyWordIndex+1:nextKeyWordIndex] if token.i not in forbiddenIndexes]
for token in allWords :
if token.lemma_ in DICT_UNIT.keys() : # if whole word
resultValues.append(DICT_UNIT[token.lemma_])
resultIndexes.append(token.i)
if token.lower_ in DICT_UNIT.values() : # if only a letter
resultValues.append(token.lower_)
resultIndexes.append(token.i)
if len(resultValues) != 1 :
return None, resultIndexes
else :
return resultValues[0], resultIndexes
def findKeyWord(processed_entry : Doc,
index : int,
attachedEntity : str,
lastKeyWordIndex : int,
nextKeyWordIndex : int,
forbiddenIndexes : list = [],
keyWordsList : list = []) :
resultValues = []
resultIndexes = []
allWords = [token for token in processed_entry[lastKeyWordIndex+1:nextKeyWordIndex] if token.i not in forbiddenIndexes]
for token in allWords :
if token.lower_ in keyWordsList :
resultValues.append(token.lower_)
resultIndexes.append(token.i)
break
if len(resultValues) != 1 :
return None,[]
else :
return resultValues[0], resultIndexes
def color(processed_entry : Doc, index : int, attachedEntity : str, lastKeyWordIndex : int, nextKeyWordIndex : int, forbiddenIndexes : list = []) :
# We first seek a keyword, if not found we directly seek a hexa code
colorKeyWords = wiki.COLORS_HEX_BASIC.keys()
if processed_entry[index].lower_ in colorKeyWords :
return wiki.COLORS_HEX_BASIC[processed_entry[index].lower_], [index]
else :
value, indexes = findKeyWord(processed_entry, index, attachedEntity, lastKeyWordIndex, nextKeyWordIndex, forbiddenIndexes, colorKeyWords)
if value != None and indexes != None :
return wiki.COLORS_HEX_BASIC[value], [index]
next_words = [token for token in processed_entry[index+1:nextKeyWordIndex] if token.i not in forbiddenIndexes]
previous_words = [token for token in processed_entry[lastKeyWordIndex+1:index] if token.i not in forbiddenIndexes]
resultIndexes = []
resultValues = re.findall("#[ ]*[a-zA-Z0-9]{6}", "".join([token.text for token in next_words]))
if len(resultValues) == 1 :
for token in next_words :
if token.text in resultValues[0] :
resultIndexes.append(token.i)
if len(resultValues) != 1 : # if none found, seek in the previous words
resultValues = re.findall("#[ ]*[a-zA-Z0-9]{6}", "".join([token.text for token in previous_words]))
if len(resultValues) == 1 :
for token in previous_words :
if token.text in resultValues[0] :
resultIndexes.append(token.i)
if len(resultValues) != 1 :
return None, resultIndexes
else :
return resultValues[0].replace(" ", ""), resultIndexes
def slot(processed_entry : Doc, index : int, attachedEntity : str, lastKeyWordIndex : int, nextKeyWordIndex : int, forbiddenIndexes : list = []) :
return template(processed_entry, index, attachedEntity, lastKeyWordIndex, nextKeyWordIndex, forbiddenIndexes)
def side(processed_entry : Doc, index : int, attachedEntity : str, lastKeyWordIndex : int, nextKeyWordIndex : int, forbiddenIndexes : list = []) :
sideKeyWords = ["front", "rear", "frontflipped", "rearflipped"]
return findKeyWord(processed_entry, index, attachedEntity, lastKeyWordIndex, nextKeyWordIndex, forbiddenIndexes, sideKeyWords)
def temperature(processed_entry : Doc, index : int, attachedEntity : str, lastKeyWordIndex : int, nextKeyWordIndex : int, forbiddenIndexes : list = []) :
temperatureKeyWords = ["cold","warm"]
if processed_entry[index].lower_ in temperatureKeyWords :
return processed_entry[index].lower_, [index]
else :
return findKeyWord(processed_entry, index, attachedEntity, lastKeyWordIndex, nextKeyWordIndex, forbiddenIndexes, temperatureKeyWords)
def type(processed_entry : Doc, index : int, attachedEntity : str, lastKeyWordIndex : int, nextKeyWordIndex : int, forbiddenIndexes : list = []) :
typeKeyWords = ["wireframe","plain"]
if processed_entry[index].lower_ in typeKeyWords :
return processed_entry[index].lower_, [index]
else :
return findKeyWord(processed_entry, index, attachedEntity, lastKeyWordIndex, nextKeyWordIndex, forbiddenIndexes, typeKeyWords)
# def slot() :
# pass
# def attribute() :
# pass
# def font(processed_entry : type(nlp("")), index : int, lastKeyWordIndex : int, nextKeyWordIndex : int,entity="") -> Optional[list] :
# #This function find the font of a label.
# #We slit the sentence in two parts
# next_words = processed_entry[index+1:nextKeyWordIndex]
# last_words = processed_entry[lastKeyWordIndex:index]
# print("next_word : ", next_words)
# findList = re.findall(r'#[ *][A-Z0-9a-z]{6}\b'," ".join([str(token) for token in next_words]))
# findList += re.findall('italic|bold'," ".join([str(token) for token in next_words]))
# if len(findList) == 0:
# findList = re.findall(r'#[ *][A-Z0-9a-z]{6}\b'," ".join([str(token) for token in last_words]))
# findList += re.findall('italic | bold'," ".join([str(token) for token in last_words]))
# if len(findList) == 0:
# raise Exception("There wasn't any argument")
# #We return the list of the argument associate to the font
# return findList