-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathharkvisualizer.py
220 lines (183 loc) · 7.27 KB
/
harkvisualizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import atexit
from datetime import timedelta
import logging as log
import os
from random import choice
import simplejson as json
from string import ascii_uppercase
from concurrent.futures import ProcessPoolExecutor
import pyhark.saas
import speech_recognition
import tornado.ioloop
import tornado.web
import tornado.websocket
import tornado.gen
from werkzeug.utils import secure_filename
STAGING_AREA = '/tmp/'
STATIC_PATH = 'static'
HTML_TEMPLATE_PATH = 'templates'
LISTEN_PORT = 80
LANGUAGE = 'ja-JP'
log.basicConfig(
level=log.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
settings = {
'static_path': os.path.join(os.path.dirname(__file__), STATIC_PATH),
'template_path': os.path.join(os.path.dirname(__file__), HTML_TEMPLATE_PATH)
}
default_hark_config = {
'processType': 'batch',
'params': {
'numSounds': 2,
'roomName': 'sample room',
'micName': 'dome',
'thresh': 21
},
'sources': [
{'from': 0, 'to': 180},
{'from': -180, 'to': 0},
]
}
class HttpRequestHandler(tornado.web.RequestHandler):
@tornado.web.asynchronous
def get(self):
self.render('index.html')
@tornado.gen.coroutine
def post(self):
file = self.request.files['file'][0]
hark.client.login()
hark.client.createSession(default_hark_config)
log.info("Uploading asynchrounously")
pool = ProcessPoolExecutor(max_workers=2)
future = pool.submit(async_upload, file)
yield future
pool.shutdown()
log.info("Rendering visualization page")
self.render('visualize.html')
# To multiprocess this, it cannot be an instance method
def async_upload(file):
file_name = secure_filename(file['filename'])
# Best effort to ensure same file name is unique per post
random_string = ''.join(choice(ascii_uppercase) for i in range(10))
audio_file = '{0}{1}_{2}'.format(STAGING_AREA, random_string, file_name)
write_handle = open(audio_file, 'w')
write_handle.write(file['body'])
read_handle = open(audio_file, 'rb')
hark.upload_file(read_handle)
log.info("Asynchronous upload complete")
# Wrapper around some PyHarkSaas methods
class Hark:
def __init__(self):
log.info('Initializing hark client')
auth = json.load(open('harkauth.json'))
client = pyhark.saas.PyHarkSaaS(auth['apikey'], auth['apisec'])
client.login()
self.client = client
def get_audio(self, srcID, file_name):
log.info('Retrieving separated audio %d from hark', srcID)
with open(file_name, 'w') as write_handle:
self.client.getSeparatedAudio(handle=write_handle,
srcID=srcID)
def delete_session(self):
log.info('Deleting hark session %s', self.client.getSessionID())
self.client.deleteSession()
def upload_file(self, file_handle):
log.info('Uploading file %s to hark', file_handle.name)
self.client.uploadFile(file_handle)
# Wrapper around SpeechRecognition
class Speech:
def __init__(self):
log.info('Initializing speech client')
self.auth = str(json.load(open('bingauth.json'))['apikey'])
self.client = speech_recognition.Recognizer()
def translate(self, file_name):
transcription = 'Inaudible'
with speech_recognition.AudioFile(os.path.join(os.path.dirname(
os.path.realpath(__file__)), file_name)) as source:
audio = self.client.record(source)
log.info('Sending separated audio file to speech api')
try:
transcription = self.client.recognize_bing(audio,
key=self.auth,
language=LANGUAGE)
# This exception should pass if there is no transcription.
except speech_recognition.UnknownValueError:
pass
log.info('Transcription: %s', transcription)
return transcription
class WebSocketHandler(tornado.websocket.WebSocketHandler):
# Allow cross-origin web socket connections
def check_origin(self, origin):
return True
# Invoked when socket closed by either end
def on_connection_close(self):
log.info('Web socket connection closed')
clean_staging()
# Invoked when socket is opened
def open(self):
log.info('Web socket connection established')
# Do not hold packets for bandwidth optimization
self.set_nodelay(True)
# ioloop to wait before attempting to sending data
tornado.ioloop.IOLoop.instance().add_timeout(timedelta(seconds=0),
self.send_data)
def send_data(self, utterances_memo = []):
if hark.client.getSessionID():
results = hark.client.getResults()
utterances = results['context']
# If result contains more utterances than memo
if len(utterances) > len(utterances_memo):
# Must iterate since new utterances
# could be anywhere in the result
for utterance in utterances:
utterance_id = utterance['srcID']
# If utterance is new
if utterance_id not in utterances_memo:
# Memoize the srcID
utterances_memo.append(utterance_id)
self.write_message(json.dumps(utterance))
log.info("Utterance %d written to socket", utterance_id)
if hark.client.isFinished():
# If we have all the utterances, transcribe, then close the socket
if sum(results['scene']['numSounds'].values()) == len(utterances_memo):
for srcID in range(len(utterances_memo)):
random_string = ''.join(choice(ascii_uppercase) for i in range(10))
file_name = '{0}{1}_part{2}.flac'.format(STAGING_AREA, random_string, srcID)
hark.get_audio(srcID, file_name)
transcription = speech.translate(file_name)
utterance = utterances[srcID]
seconds, milliseconds = divmod(utterance['startTimeMs'], 1000)
minutes, seconds = divmod(seconds, 60)
self.write_message(json.dumps(
'{0} at ({1}:{2}:{3}):'.format(utterance['guid'], minutes, seconds, milliseconds)))
self.write_message(json.dumps(transcription, ensure_ascii=False))
del utterances_memo[:]
self.close()
else:
tornado.ioloop.IOLoop.instance().add_timeout(timedelta(seconds=1), self.send_data)
def clean_staging():
remove_all(STAGING_AREA)
if hark.client.getSessionID():
hark.delete_session()
def get_app():
application = tornado.web.Application([
(r'/', HttpRequestHandler),
(r'/websocket', WebSocketHandler),
(r'/favicon.ico', tornado.web.StaticFileHandler,
dict(path=settings['static_path'])),
], **settings)
return application
def remove_all(dir):
log.info('Deleting contents of %s', dir)
files = os.listdir(dir)
for file in files:
os.remove(dir + file)
if __name__ == '__main__':
hark = Hark()
speech = Speech()
atexit.register(clean_staging)
log.info('Initializing web application')
app = get_app()
app.listen(LISTEN_PORT)
tornado.ioloop.IOLoop.instance().start()