Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(code quality): add black and ruff #56

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
__pycache__/
*.wav
keys.py
.venv/
.venv/
.DS_Store
ecout_env
44 changes: 30 additions & 14 deletions AudioRecorder.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import custom_speech_recognition as sr
import pyaudiowpatch as pyaudio
from datetime import datetime

import pyaudiowpatch as pyaudio

import custom_speech_recognition as sr

RECORD_TIMEOUT = 3
ENERGY_THRESHOLD = 1000
DYNAMIC_ENERGY_THRESHOLD = False


class BaseRecorder:
def __init__(self, source, source_name):
self.recorder = sr.Recognizer()
Expand All @@ -21,35 +24,48 @@ def adjust_for_noise(self, device_name, msg):
print(f"[INFO] Completed ambient noise adjustment for {device_name}.")

def record_into_queue(self, audio_queue):
def record_callback(_, audio:sr.AudioData) -> None:
def record_callback(_, audio: sr.AudioData) -> None:
data = audio.get_raw_data()
audio_queue.put((self.source_name, data, datetime.utcnow()))

self.recorder.listen_in_background(self.source, record_callback, phrase_time_limit=RECORD_TIMEOUT)
self.recorder.listen_in_background(
self.source, record_callback, phrase_time_limit=RECORD_TIMEOUT
)


class DefaultMicRecorder(BaseRecorder):
def __init__(self):
super().__init__(source=sr.Microphone(sample_rate=16000), source_name="You")
self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...")
self.adjust_for_noise(
"Default Mic", "Please make some noise from the Default Mic..."
)


class DefaultSpeakerRecorder(BaseRecorder):
def __init__(self):
with pyaudio.PyAudio() as p:
wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
default_speakers = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"])

default_speakers = p.get_device_info_by_index(
wasapi_info["defaultOutputDevice"]
)

if not default_speakers["isLoopbackDevice"]:
for loopback in p.get_loopback_device_info_generator():
if default_speakers["name"] in loopback["name"]:
default_speakers = loopback
break
else:
print("[ERROR] No loopback device found.")

source = sr.Microphone(speaker=True,
device_index= default_speakers["index"],
sample_rate=int(default_speakers["defaultSampleRate"]),
chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
channels=default_speakers["maxInputChannels"])

source = sr.Microphone(
speaker=True,
device_index=default_speakers["index"],
sample_rate=int(default_speakers["defaultSampleRate"]),
chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
channels=default_speakers["maxInputChannels"],
)
super().__init__(source=source, source_name="Speaker")
self.adjust_for_noise("Default Speaker", "Please make or play some noise from the Default Speaker...")
self.adjust_for_noise(
"Default Speaker",
"Please make or play some noise from the Default Speaker...",
)
66 changes: 39 additions & 27 deletions AudioTranscriber.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
import whisper
import torch
import wave
import io
import os
import threading
from tempfile import NamedTemporaryFile
import custom_speech_recognition as sr
import io
import wave
from datetime import timedelta
import pyaudiowpatch as pyaudio
from heapq import merge
from tempfile import NamedTemporaryFile

import pyaudiowpatch as pyaudio

import custom_speech_recognition as sr

PHRASE_TIMEOUT = 3.05

MAX_PHRASES = 10


class AudioTranscriber:
def __init__(self, mic_source, speaker_source, model):
self.transcript_data = {"You": [], "Speaker": []}
Expand All @@ -24,20 +25,20 @@ def __init__(self, mic_source, speaker_source, model):
"sample_rate": mic_source.SAMPLE_RATE,
"sample_width": mic_source.SAMPLE_WIDTH,
"channels": mic_source.channels,
"last_sample": bytes(),
"last_sample": b"",
"last_spoken": None,
"new_phrase": True,
"process_data_func": self.process_mic_data
"process_data_func": self.process_mic_data,
},
"Speaker": {
"sample_rate": speaker_source.SAMPLE_RATE,
"sample_width": speaker_source.SAMPLE_WIDTH,
"channels": speaker_source.channels,
"last_sample": bytes(),
"last_sample": b"",
"last_spoken": None,
"new_phrase": True,
"process_data_func": self.process_speaker_data
}
"process_data_func": self.process_speaker_data,
},
}

def transcribe_audio_queue(self, audio_queue):
Expand All @@ -46,7 +47,7 @@ def transcribe_audio_queue(self, audio_queue):
self.update_last_sample_and_phrase_status(who_spoke, data, time_spoken)
source_info = self.audio_sources[who_spoke]

text = ''
text = ""
temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
temp_file.close()

Expand All @@ -55,29 +56,35 @@ def transcribe_audio_queue(self, audio_queue):

os.unlink(temp_file.name)

if text != '' and text.lower() != 'you':
if text != "" and text.lower() != "you":
self.update_transcript(who_spoke, text, time_spoken)
self.transcript_changed_event.set()

def update_last_sample_and_phrase_status(self, who_spoke, data, time_spoken):
source_info = self.audio_sources[who_spoke]
if source_info["last_spoken"] and time_spoken - source_info["last_spoken"] > timedelta(seconds=PHRASE_TIMEOUT):
source_info["last_sample"] = bytes()
if source_info["last_spoken"] and time_spoken - source_info[
"last_spoken"
] > timedelta(seconds=PHRASE_TIMEOUT):
source_info["last_sample"] = b""
source_info["new_phrase"] = True
else:
source_info["new_phrase"] = False

source_info["last_sample"] += data
source_info["last_spoken"] = time_spoken
source_info["last_spoken"] = time_spoken

def process_mic_data(self, data, temp_file_name):
audio_data = sr.AudioData(data, self.audio_sources["You"]["sample_rate"], self.audio_sources["You"]["sample_width"])
audio_data = sr.AudioData(
data,
self.audio_sources["You"]["sample_rate"],
self.audio_sources["You"]["sample_width"],
)
wav_data = io.BytesIO(audio_data.get_wav_data())
with open(temp_file_name, 'w+b') as f:
with open(temp_file_name, "w+b") as f:
f.write(wav_data.read())

def process_speaker_data(self, data, temp_file_name):
with wave.open(temp_file_name, 'wb') as wf:
with wave.open(temp_file_name, "wb") as wf:
wf.setnchannels(self.audio_sources["Speaker"]["channels"])
p = pyaudio.PyAudio()
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
Expand All @@ -96,18 +103,23 @@ def update_transcript(self, who_spoke, text, time_spoken):
transcript[0] = (f"{who_spoke}: [{text}]\n\n", time_spoken)

def get_transcript(self):
combined_transcript = list(merge(
self.transcript_data["You"], self.transcript_data["Speaker"],
key=lambda x: x[1], reverse=True))
combined_transcript = list(
merge(
self.transcript_data["You"],
self.transcript_data["Speaker"],
key=lambda x: x[1],
reverse=True,
)
)
combined_transcript = combined_transcript[:MAX_PHRASES]
return "".join([t[0] for t in combined_transcript])

def clear_transcript_data(self):
self.transcript_data["You"].clear()
self.transcript_data["Speaker"].clear()

self.audio_sources["You"]["last_sample"] = bytes()
self.audio_sources["Speaker"]["last_sample"] = bytes()
self.audio_sources["You"]["last_sample"] = b""
self.audio_sources["Speaker"]["last_sample"] = b""

self.audio_sources["You"]["new_phrase"] = True
self.audio_sources["Speaker"]["new_phrase"] = True
self.audio_sources["Speaker"]["new_phrase"] = True
38 changes: 22 additions & 16 deletions GPTResponder.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
import time

import openai
from keys import OPENAI_API_KEY
from prompts import create_prompt, INITIAL_RESPONSE
import time

from prompts import INITIAL_RESPONSE, create_prompt

openai.api_key = OPENAI_API_KEY


def generate_response_from_transcript(transcript):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-0301",
messages=[{"role": "system", "content": create_prompt(transcript)}],
temperature = 0.0
model="gpt-3.5-turbo-0301",
messages=[{"role": "system", "content": create_prompt(transcript)}],
temperature=0.0,
)
except Exception as e:
print(e)
return ''
return ""
full_response = response.choices[0].message.content
try:
return full_response.split('[')[1].split(']')[0]
except:
return ''

return full_response.split("[")[1].split("]")[0]
except IndexError:
return ""


class GPTResponder:
def __init__(self):
self.response = INITIAL_RESPONSE
Expand All @@ -31,14 +35,16 @@ def respond_to_transcriber(self, transcriber):
if transcriber.transcript_changed_event.is_set():
start_time = time.time()

transcriber.transcript_changed_event.clear()
transcriber.transcript_changed_event.clear()
transcript_string = transcriber.get_transcript()
response = generate_response_from_transcript(transcript_string)

end_time = time.time() # Measure end time
execution_time = end_time - start_time # Calculate the time it took to execute the function

if response != '':

# Calculate the time it took to execute the function
execution_time = end_time - start_time

if response != "":
self.response = response

remaining_time = self.response_interval - execution_time
Expand All @@ -48,4 +54,4 @@ def respond_to_transcriber(self, transcriber):
time.sleep(0.3)

def update_response_interval(self, interval):
self.response_interval = interval
self.response_interval = interval
21 changes: 21 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
lint:
@echo
ruff .
@echo
black --check --diff --color .
@echo
pip-audit

format:
ruff --silent --exit-zero --fix .
black .

precommit:
make lint
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should probably have instructions to install 'make' as well, chocolatey seems easiest

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe it's built-in on MacOS and Linux, not sure about Windows.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Windows you have to install it
installing choco with the commad in README and using

choco install make 

would be good enough for instructions

make format

venv:
python -m venv ecout_env

install:
pip install -r requirements.txt
Loading