From 7ff3a31e72258f7207e8afdef84e777d54aa3a54 Mon Sep 17 00:00:00 2001 From: ManishMadan2882 Date: Tue, 29 Oct 2024 03:11:51 +0530 Subject: [PATCH] (feat:TTS) create gtts over abstraction --- application/api/user/routes.py | 25 +++++++++++++++++++++++++ application/requirements.txt | 1 + application/tts/base.py | 10 ++++++++++ application/tts/google_tts.py | 19 +++++++++++++++++++ 4 files changed, 55 insertions(+) create mode 100644 application/tts/base.py create mode 100644 application/tts/google_tts.py diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 3f1a72187..794c69d4a 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -17,6 +17,7 @@ from application.extensions import api from application.utils import check_required_fields from application.vectorstore.vector_creator import VectorCreator +from application.tts.google_tts import GoogleTTS mongo = MongoClient(settings.MONGO_URI) db = mongo["docsgpt"] @@ -1663,3 +1664,27 @@ def post(self): return make_response(jsonify({"success": False, "error": str(err)}), 400) return make_response(jsonify({"success": True}), 200) + + +@user_ns.route("/api/tts") +class TextToSpeech(Resource): + tts_model = api.model( + "TextToSpeechModel", + { + "text": fields.String(required=True, description="Text to be synthesized as audio"), + }, + ) + + @api.expect(tts_model) + @api.doc(description="Synthesize audio speech from text") + def post(self): + data = request.get_json() + text = data["text"] + try: + tts_instance = GoogleTTS(text) + audio_base64, detected_language = tts_instance.text_to_speech() + return make_response(jsonify({"success": True,'audio_base64': audio_base64,'lang':detected_language}), 200) + except Exception as err: + return make_response(jsonify({"success": False, "error": str(err)}), 400) + + diff --git a/application/requirements.txt b/application/requirements.txt index 6ea1d1ba2..aad629f14 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -85,3 +85,4 @@ vine==5.1.0 wcwidth==0.2.13 werkzeug==3.0.4 yarl==1.11.1 +gTTS==2.3.2 \ No newline at end of file diff --git a/application/tts/base.py b/application/tts/base.py new file mode 100644 index 000000000..143bed734 --- /dev/null +++ b/application/tts/base.py @@ -0,0 +1,10 @@ +from abc import ABC, abstractmethod + + +class BaseTTS(ABC): + def __init__(self): + pass + + @abstractmethod + def text_to_speech(self, *args, **kwargs): + pass \ No newline at end of file diff --git a/application/tts/google_tts.py b/application/tts/google_tts.py new file mode 100644 index 000000000..310309dc2 --- /dev/null +++ b/application/tts/google_tts.py @@ -0,0 +1,19 @@ +import io +import base64 +from gtts import gTTS +from application.tts.base import BaseTTS + + +class GoogleTTS(BaseTTS): + def __init__(self, text): + self.text = text + + + def text_to_speech(self): + lang = "en" + audio_fp = io.BytesIO() + tts = gTTS(text=self.text, lang=lang, slow=False) + tts.write_to_fp(audio_fp) + audio_fp.seek(0) + audio_base64 = base64.b64encode(audio_fp.read()).decode("utf-8") + return audio_base64, lang