refact: suport multiple audio types

WilliamSilveiraF · Oct 26, 2023 · cd2e610 · cd2e610
1 parent 4b58da2
commit cd2e610
Show file tree

Hide file tree

Showing 7 changed files with 59 additions and 9 deletions.
diff --git a/api/routes/audio.py b/api/routes/audio.py
@@ -10,10 +10,6 @@
 
 AUDIO_PATH = "uploaded_audio/"
 
-@router.get("/")
-def read_root():
-    return {"Audio":"OK"}
-
 @router.post("/upload/")
 async def upload_audio(
     file: UploadFile = File(...),
@@ -28,7 +24,7 @@ async def upload_audio(
         with open(path_to_audio, 'wb') as f:
             f.write(data)
 
-        transcription = transcribe_audio.transcribe_audio_content(path_to_audio)
+        transcription = transcribe_audio.transcribe_audio_content(path_to_audio, 'latest_short')
         summary_text = summary.generate_summary(transcription)
         sentiment_scores = sentiment_calculator.sentiment_score(transcription)
 

diff --git a/services/transcribe_audio.py b/services/transcribe_audio.py
@@ -1,18 +1,44 @@
 from google.cloud import speech
+from pydub.utils import mediainfo
 
-def transcribe_audio_content(speech_file: str) -> speech.RecognizeResponse:
+def map_audio_properties_to_encoding(audio_properties):
+    encoding_map = {
+        ('pcm_s16le', None): 'LINEAR16',
+        ('flac', None): 'FLAC',
+        ('mulaw', None): 'MULAW',
+        ('amr-nb', None): 'AMR',
+        ('amr-wb', None): 'AMR_WB',
+        ('opus', 'ogg'): 'OGG_OPUS',
+        ('speex', None): 'SPEEX_WITH_HEADER_BYTE',
+        ('opus', 'webm'): 'WEBM_OPUS',
+    }
+
+    codec_container_tuple = (
+        audio_properties.get('codec_name', '').lower(),
+        audio_properties.get('format_name', '').lower()
+    )
+
+    encoding_str = encoding_map.get(codec_container_tuple, 'ENCODING_UNSPECIFIED')
+    return getattr(speech.RecognitionConfig.AudioEncoding, encoding_str)
+
+
+def transcribe_audio_content(speech_file: str, model: str) -> str:
 
     client = speech.SpeechClient()
+
+    audio_properties = mediainfo(speech_file)
+    encoding = map_audio_properties_to_encoding(audio_properties)
 
     with open(speech_file, "rb") as audio_file:
         content = audio_file.read()
 
     audio = speech.RecognitionAudio(content=content)
 
-    config = speech.RecognitionConfig( # TODO SUPPORT DIFFERENT TYPES OF AUDIO
-        encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
-        sample_rate_hertz=16000,
+    config = speech.RecognitionConfig(
+        encoding=encoding,
+        sample_rate_hertz=int(audio_properties['sample_rate']),
         language_code="en-US",
+        model=model
     )
 
     response = client.recognize(config=config, audio=audio)

diff --git a/static/test_transcribe.flac b/static/test_transcribe.flac
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/test_sentiment_calculator.py b/tests/test_sentiment_calculator.py
@@ -0,0 +1,16 @@
+from services.sentiment_calculator import sentiment_score
+
+def test_sentiment_scores_1():
+    text = "I really like this project"
+    scores = sentiment_score(text)
+    assert scores['positive_score'] > scores['neutral_score'] > scores['negative_score']
+
+def test_sentiment_scores_2():
+    text = "I don't like lettuce"
+    scores = sentiment_score(text)
+    assert scores['positive_score'] < scores['neutral_score'] < scores['negative_score']
+
+def test_sentiment_scores_3():
+    text = "The temperature today is 20 degrees Celsius."
+    scores = sentiment_score(text)
+    assert scores['positive_score'] < scores['neutral_score'] > scores['negative_score']
diff --git a/tests/test_summary.py b/tests/test_summary.py
@@ -0,0 +1,6 @@
+from services.summary import generate_summary
+
+def test_generate_summary():
+    text = "FastAPI is a modern, fast (high-performance), web framework for building APIs with Python 3.7+ based on standard Python type hints."
+    summary = generate_summary(text)
+    assert "fastapi" in summary.lower()
diff --git a/tests/test_transcribe_audio.py b/tests/test_transcribe_audio.py
@@ -0,0 +1,6 @@
+from services.transcribe_audio import transcribe_audio_content
+
+def test_transcribe_audio_content():
+    audio_file_path = "static/test_transcribe.flac"
+    transcript = transcribe_audio_content(speech_file=audio_file_path, model='default')
+    assert "slushy" in transcript.lower()