Voice-To-Voice_test

Runtime error

App Files Files Community

Voice-To-Voice_test / app.py

sal-maq

Create app.py

e4c39da verified about 1 year ago

raw

history blame

4.24 kB

	import os
	import tempfile
	import numpy as np
	import gradio as gr
	import whisper
	from gtts import gTTS
	from groq import Groq
	import soundfile as sf

	# Set up Groq API key
	os.environ['GROQ_API_KEY'] = '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'
	groq_client = Groq(api_key=os.environ.get('GROQ_API_KEY'))

	# Load Whisper model
	whisper_model = whisper.load_model("base")

	def process_audio(audio_file_path):
	try:
	# Ensure audio_file_path is valid
	if not audio_file_path:
	raise ValueError("No audio file provided")

	print(f"Received audio file path: {audio_file_path}")

	# Read the audio file from the file path
	with open(audio_file_path, 'rb') as f:
	audio_data = f.read()

	# Save the audio data to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio_file:
	temp_audio_path = temp_audio_file.name
	temp_audio_file.write(audio_data)

	# Ensure the temporary file is properly closed before processing
	temp_audio_file.close()

	# Transcribe audio using Whisper
	result = whisper_model.transcribe(temp_audio_path)
	user_text = result['text']
	print(f"Transcribed text: {user_text}")

	# Generate response using Llama 8b model with Groq API
	chat_completion = groq_client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": user_text,
	}
	],
	model="llama3-8b-8192",
	)
	response_text = chat_completion.choices[0].message.content
	print(f"Response text: {response_text}")

	# Convert response text to speech using gTTS
	tts = gTTS(text=response_text, lang='en')
	with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio_file:
	response_audio_path = temp_audio_file.name
	tts.save(response_audio_path)

	# Ensure the temporary file is properly closed before returning the path
	temp_audio_file.close()

	return response_text, response_audio_path
	except Exception as e:
	return f"Error: {str(e)}", None

	# Create Gradio interface with updated layout
	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	<style>
	.gradio-container {
	font-family: Arial, sans-serif;
	background-color: #e0f7fa; /* Changed background color */
	border-radius: 10px;
	padding: 20px;
	box-shadow: 0 4px 12px rgba(0,0,0,0.2);
	}
	.gradio-input, .gradio-output {
	border-radius: 6px;
	border: 1px solid #ddd;
	padding: 10px;
	}
	.gradio-button {
	background-color: #28a745;
	color: white;
	border-radius: 6px;
	border: none;
	padding: 8px 16px; /* Adjusted padding */
	font-size: 16px; /* Adjusted font size */
	}
	.gradio-button:hover {
	background-color: #218838;
	}
	.gradio-title {
	font-size: 24px;
	font-weight: bold;
	margin-bottom: 20px;
	}
	.gradio-description {
	font-size: 14px;
	margin-bottom: 20px;
	color: #555;
	}
	</style>
	"""
	)

	gr.Markdown("# Voice-to-Voice Chatbot\nDeveloped by Salman Maqbool")
	gr.Markdown("Upload an audio file to interact with the voice-to-voice chatbot. The chatbot will transcribe the audio, generate a response, and provide a spoken reply.")

	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(type="filepath", label="Upload Audio File")
	submit_button = gr.Button("Submit")

	with gr.Column():
	response_text = gr.Textbox(label="Response Text", placeholder="Generated response will appear here")
	response_audio = gr.Audio(label="Response Audio", type="filepath")

	submit_button.click(process_audio, inputs=audio_input, outputs=[response_text, response_audio])

	# Launch the Gradio app
	demo.launch()