worker.py

from openai import OpenAI
import requests
from typing import Any , Optional

openai_client = OpenAI()


def speech_to_text(audio_binary: bytes) -> str:
    """
    Converts the input audio binary data to text using the Watson Speech-to-Text API.

    Parameters:
        audio_binary (bytes): The binary audio data to be converted to text.

    Returns:
        str: The recognized text extracted from the audio data.
    """
    # Watson Speech-to-Text HTTP API URL
    base_url: str = 'https://sn-watson-stt.labs.skills.network'

    # HTTP request parameters
    params: dict = {
        'model': 'en-US_Multimedia',
    }

    # Body for POST request
    body: bytes = audio_binary

    # Handle HTTP POST request
    response = requests.post(base_url, params=params, data=audio_binary).json()

    # Parse the text response
    text: str = 'null'
    while bool(response.get('results')):
        print('speech to text response:', response)  # For debugging
        text = response.get('results').pop().get('alternatives').pop().get('transcript')
        print('recognized text:', text)
        return text


def text_to_speech(text: str, voice: Optional[str] = "") -> bytes:
    """
    Converts the input text to speech using the Watson Text-to-Speech API.

    Parameters:
        text (str): The text to be converted to speech.
        voice (str, optional): The voice to be used for speech synthesis. Defaults to "".

    Returns:
        bytes: The binary audio data representing the synthesized speech.
    """
    # Watson Speech-to-Text HTTP API URL for TTS
    base_url: str = 'https://sn-watson-stt.labs.skills.network'
    api_url: str = base_url + '/text-to-speech/api/v1/synthesize?output=output_text.wav'

    # Adding voice parameter in API URL if the user has selected a preferred voice
    if voice != "" and voice != "default":
        api_url += "&voice=" + voice

    # Headers setting for HTTP POST request
    headers: dict = {
        'Accept': 'audio/wav',
        'Content-Type': 'application/json',
    }

    # Request body
    json_data: dict = {'text': text}

    # HTTP POST request
    response = requests.post(api_url, headers=headers, json=json_data)
    print('text to speech response:', response)

    # Return the binary audio data representing the synthesized speech
    return response.content


def openai_process_message(user_message: str) -> str:
    """
    Processes the user message using the OpenAI API and returns the response text.

    Parameters:
        user_message (str): The message sent by the user.

    Returns:
        str: The response text generated by the OpenAI API.
    """
    # Input prompt for OpenAI API
    prompt: str = "Act like a personal assistant. You can respond to questions, translate sentences, summarize news, and give recommendations."
    
    # Call the OpenAI API to process our prompt
    openai_response: Any = openai_client.chat.completions.create(
        model="gpt-3.5-turbo", 
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": user_message}
        ],
        max_tokens=4000
    )
    print("openai response:", openai_response)
    
    # Parse the OpenAI response message
    response_text: str = openai_response.choices[0].message.content
    return response_text