-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathvoice_detection.py
63 lines (49 loc) · 2.02 KB
/
voice_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import time
from google.cloud import speech_v1
from google.cloud.speech_v1 import types
import pyaudio
os.environ["key"] = "AIzaSyAx639c5Nzd4ff3DxtXCwHGmskY4WV8PhE"
print(os.environ['key'])
def levenshtein_distance(s1, s2):
if len(s1) > len(s2):
s1, s2 = s2, s1
distances = range(len(s1) + 1)
for index2, char2 in enumerate(s2):
new_distances = [index2 + 1]
for index1, char1 in enumerate(s1):
if char1 == char2:
new_distances.append(distances[index1])
else:
new_distances.append(1 + min((distances[index1], distances[index1 + 1], new_distances[-1])))
distances = new_distances
return distances[-1]
def most_similar_word(target_word, word_list):
return min(word_list, key=lambda word: levenshtein_distance(target_word, word))
def transcribe_stream_with_word_level_confidence():
client = speech_v1.SpeechClient(client_options={"api_key": os.environ['key']})
config = types.RecognitionConfig(
encoding=speech_v1.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code="en-US",
enable_word_confidence=True
)
streaming_config = types.StreamingRecognitionConfig(
config=config,
interim_results=True
)
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
stream.start_stream()
requests = (types.StreamingRecognizeRequest(audio_content=chunk) for chunk in iter(lambda: stream.read(1024), b""))
responses = client.streaming_recognize(streaming_config, requests)
for response in responses:
for result in response.results:
for word_info in result.alternatives[0].words:
sim = most_similar_word(word_info.word, ["hellfire", "recall"])
print(sim)
open("/tmp/sync", "w").write(sim)
stream.stop_stream()
stream.close()
p.terminate()
transcribe_stream_with_word_level_confidence()