-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmakeGrammerLabelsFromCsv.py
45 lines (39 loc) · 2.1 KB
/
makeGrammerLabelsFromCsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# 1 correct language
# 0 incorrect language
import sys
import subprocess
content = []
result = []
for idx, arg in enumerate(sys.argv):
if not idx == 0:
isFirst = True
with open(arg) as inputFile:
content = inputFile.readlines()
for line in content:
splitted = line.split(" ")
if(isFirst):
isFirst = False
else:
id = splitted[2][1:-1].split('.wav')[0]
if '_' in id:
partOfId = id.split('_', 1)[0].split('-')[1]
fixpartOfId = partOfId
if len(partOfId) == 2:
fixpartOfId = '0' + partOfId
if len(partOfId) == 1:
fixpartOfId = '00' + partOfId
id = id.replace(partOfId, fixpartOfId, 1)
# language
language = splitted[5].lower().replace('"', '')
label = -1
if 'correct' in language:
print(id + ' is labeld as 1')
label = 1
if 'incorrect' in language:
print(id + ' is labeld as 0')
label = 0
result.append(id + ',' + str(label) + '\n')
with open("grammer_label.csv", 'w+') as outputFile:
for entry in result:
outputFile.write(entry)
print('saved output to: grammer_label.csv')