Spaces:

Abbas0786
/

s22t

Sleeping

App Files Files Community

s22t / app.py

Abbas0786

Create app.py

37717e6 verified 12 months ago

raw

history blame

2.24 kB

	import os
	import gradio as gr
	import whisper
	from gtts import gTTS
	import io
	from groq import Groq
	import time

	# Ensure GROQ_API_KEY is defined
	GROQ_API_KEY ="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"
	if not GROQ_API_KEY:
	raise ValueError("GROQ_API_KEY is not set in environment variables.")

	# Initialize the Groq client
	client = Groq(api_key=GROQ_API_KEY)

	# Load the Whisper model
	model = whisper.load_model("base") # Ensure this model supports Urdu; otherwise, choose a suitable model

	def process_audio(file_path):
	try:
	# Load the audio file
	audio = whisper.load_audio(file_path)

	# Transcribe the audio using Whisper (specify language if needed)
	result = model.transcribe(audio, language="ur") # Specify 'ur' for Urdu
	text = result["text"]

	# Generate a response in Urdu using Groq
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": text}],
	model="llama3-8b-8192", # Ensure this model can handle Urdu
	)

	# Access the response using dot notation
	response_message = chat_completion.choices[0].message.content.strip()

	# Convert the response text to Urdu speech
	tts = gTTS(response_message, lang='ur') # Specify language 'ur' for Urdu
	response_audio_io = io.BytesIO()
	tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
	response_audio_io.seek(0)

	# Generate a unique filename
	response_audio_path = "response_" + str(int(time.time())) + ".mp3"

	# Save audio to a file
	with open(response_audio_path, "wb") as audio_file:
	audio_file.write(response_audio_io.getvalue())

	# Return the response text and the path to the saved audio file
	return response_message, response_audio_path

	except Exception as e:
	return f"An error occurred: {e}", None

	iface = gr.Interface(
	fn=process_audio,
	inputs=gr.Audio(type="filepath"), # Use type="filepath"
	outputs=[gr.Textbox(label="Response Text (Urdu)"), gr.Audio(label="Response Audio (Urdu)")],
	live=True # Set to False if you do not need real-time updates
	)

	iface.launch()