Skip to content

Commit

Permalink
refact: suport multiple audio types
Browse files Browse the repository at this point in the history
  • Loading branch information
WilliamSilveiraF committed Oct 26, 2023
1 parent 4b58da2 commit cd2e610
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 9 deletions.
6 changes: 1 addition & 5 deletions api/routes/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@

AUDIO_PATH = "uploaded_audio/"

@router.get("/")
def read_root():
return {"Audio":"OK"}

@router.post("/upload/")
async def upload_audio(
file: UploadFile = File(...),
Expand All @@ -28,7 +24,7 @@ async def upload_audio(
with open(path_to_audio, 'wb') as f:
f.write(data)

transcription = transcribe_audio.transcribe_audio_content(path_to_audio)
transcription = transcribe_audio.transcribe_audio_content(path_to_audio, 'latest_short')
summary_text = summary.generate_summary(transcription)
sentiment_scores = sentiment_calculator.sentiment_score(transcription)

Expand Down
34 changes: 30 additions & 4 deletions services/transcribe_audio.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,44 @@
from google.cloud import speech
from pydub.utils import mediainfo

def transcribe_audio_content(speech_file: str) -> speech.RecognizeResponse:
def map_audio_properties_to_encoding(audio_properties):
encoding_map = {
('pcm_s16le', None): 'LINEAR16',
('flac', None): 'FLAC',
('mulaw', None): 'MULAW',
('amr-nb', None): 'AMR',
('amr-wb', None): 'AMR_WB',
('opus', 'ogg'): 'OGG_OPUS',
('speex', None): 'SPEEX_WITH_HEADER_BYTE',
('opus', 'webm'): 'WEBM_OPUS',
}

codec_container_tuple = (
audio_properties.get('codec_name', '').lower(),
audio_properties.get('format_name', '').lower()
)

encoding_str = encoding_map.get(codec_container_tuple, 'ENCODING_UNSPECIFIED')
return getattr(speech.RecognitionConfig.AudioEncoding, encoding_str)


def transcribe_audio_content(speech_file: str, model: str) -> str:

client = speech.SpeechClient()

audio_properties = mediainfo(speech_file)
encoding = map_audio_properties_to_encoding(audio_properties)

with open(speech_file, "rb") as audio_file:
content = audio_file.read()

audio = speech.RecognitionAudio(content=content)

config = speech.RecognitionConfig( # TODO SUPPORT DIFFERENT TYPES OF AUDIO
encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
sample_rate_hertz=16000,
config = speech.RecognitionConfig(
encoding=encoding,
sample_rate_hertz=int(audio_properties['sample_rate']),
language_code="en-US",
model=model
)

response = client.recognize(config=config, audio=audio)
Expand Down
Binary file added static/test_transcribe.flac
Binary file not shown.
Empty file added tests/__init__.py
Empty file.
16 changes: 16 additions & 0 deletions tests/test_sentiment_calculator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from services.sentiment_calculator import sentiment_score

def test_sentiment_scores_1():
text = "I really like this project"
scores = sentiment_score(text)
assert scores['positive_score'] > scores['neutral_score'] > scores['negative_score']

def test_sentiment_scores_2():
text = "I don't like lettuce"
scores = sentiment_score(text)
assert scores['positive_score'] < scores['neutral_score'] < scores['negative_score']

def test_sentiment_scores_3():
text = "The temperature today is 20 degrees Celsius."
scores = sentiment_score(text)
assert scores['positive_score'] < scores['neutral_score'] > scores['negative_score']
6 changes: 6 additions & 0 deletions tests/test_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from services.summary import generate_summary

def test_generate_summary():
text = "FastAPI is a modern, fast (high-performance), web framework for building APIs with Python 3.7+ based on standard Python type hints."
summary = generate_summary(text)
assert "fastapi" in summary.lower()
6 changes: 6 additions & 0 deletions tests/test_transcribe_audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from services.transcribe_audio import transcribe_audio_content

def test_transcribe_audio_content():
audio_file_path = "static/test_transcribe.flac"
transcript = transcribe_audio_content(speech_file=audio_file_path, model='default')
assert "slushy" in transcript.lower()

0 comments on commit cd2e610

Please sign in to comment.