import os import gradio as gr import whisper from gtts import gTTS import io from groq import Groq import time # Ensure GROQ_API_KEY is defined GROQ_API_KEY ="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" if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY is not set in environment variables.") # Initialize the Groq client client = Groq(api_key=GROQ_API_KEY) # Load the Whisper model model = whisper.load_model("base") # Ensure this model supports Urdu; otherwise, choose a suitable model def process_audio(file_path): try: # Load the audio file audio = whisper.load_audio(file_path) # Transcribe the audio using Whisper (specify language if needed) result = model.transcribe(audio, language="ur") # Specify 'ur' for Urdu text = result["text"] # Generate a response in Urdu using Groq chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": text}], model="llama3-8b-8192", # Ensure this model can handle Urdu ) # Access the response using dot notation response_message = chat_completion.choices[0].message.content.strip() # Convert the response text to Urdu speech tts = gTTS(response_message, lang='ur') # Specify language 'ur' for Urdu response_audio_io = io.BytesIO() tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object response_audio_io.seek(0) # Generate a unique filename response_audio_path = "response_" + str(int(time.time())) + ".mp3" # Save audio to a file with open(response_audio_path, "wb") as audio_file: audio_file.write(response_audio_io.getvalue()) # Return the response text and the path to the saved audio file return response_message, response_audio_path except Exception as e: return f"An error occurred: {e}", None iface = gr.Interface( fn=process_audio, inputs=gr.Audio(type="filepath"), # Use type="filepath" outputs=[gr.Textbox(label="Response Text (Urdu)"), gr.Audio(label="Response Audio (Urdu)")], live=True # Set to False if you do not need real-time updates ) iface.launch()