Spaces:

Abbas0786
/

s22t

Sleeping

File size: 2,241 Bytes

import os
import gradio as gr
import whisper
from gtts import gTTS
import io
from groq import Groq
import time

# Ensure GROQ_API_KEY is defined
GROQ_API_KEY ="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"
if not GROQ_API_KEY:
    raise ValueError("GROQ_API_KEY is not set in environment variables.")

# Initialize the Groq client
client = Groq(api_key=GROQ_API_KEY)

# Load the Whisper model
model = whisper.load_model("base")  # Ensure this model supports Urdu; otherwise, choose a suitable model

def process_audio(file_path):
    try:
        # Load the audio file
        audio = whisper.load_audio(file_path)

        # Transcribe the audio using Whisper (specify language if needed)
        result = model.transcribe(audio, language="ur")  # Specify 'ur' for Urdu
        text = result["text"]

        # Generate a response in Urdu using Groq
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": text}],
            model="gemma2-9b-it",  # Ensure this model can handle Urdu
        )

        # Access the response using dot notation
        response_message = chat_completion.choices[0].message.content.strip()

        # Convert the response text to Urdu speech
        tts = gTTS(response_message, lang='ur')  # Specify language 'ur' for Urdu
        response_audio_io = io.BytesIO()
        tts.write_to_fp(response_audio_io)  # Save the audio to the BytesIO object
        response_audio_io.seek(0)

        # Generate a unique filename
        response_audio_path = "response_" + str(int(time.time())) + ".mp3"
        
        # Save audio to a file
        with open(response_audio_path, "wb") as audio_file:
            audio_file.write(response_audio_io.getvalue())

        # Return the response text and the path to the saved audio file
        return response_message, response_audio_path

    except Exception as e:
        return f"An error occurred: {e}", None

iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath"),  # Use type="filepath"
    outputs=[gr.Textbox(label="Response Text (Urdu)"), gr.Audio(label="Response Audio (Urdu)")],
    live=True  # Set to False if you do not need real-time updates
)

iface.launch()