Spaces:
Runtime error
Runtime error
import os | |
import tempfile | |
import numpy as np | |
import gradio as gr | |
import whisper | |
from gtts import gTTS | |
from groq import Groq | |
import soundfile as sf | |
# Set up Groq API key | |
os.environ['GROQ_API_KEY'] = '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' | |
groq_client = Groq(api_key=os.environ.get('GROQ_API_KEY')) | |
# Load Whisper model | |
whisper_model = whisper.load_model("base") | |
def process_audio(audio_file_path): | |
try: | |
# Ensure audio_file_path is valid | |
if not audio_file_path: | |
raise ValueError("No audio file provided") | |
print(f"Received audio file path: {audio_file_path}") | |
# Read the audio file from the file path | |
with open(audio_file_path, 'rb') as f: | |
audio_data = f.read() | |
# Save the audio data to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio_file: | |
temp_audio_path = temp_audio_file.name | |
temp_audio_file.write(audio_data) | |
# Ensure the temporary file is properly closed before processing | |
temp_audio_file.close() | |
# Transcribe audio using Whisper | |
result = whisper_model.transcribe(temp_audio_path) | |
user_text = result['text'] | |
print(f"Transcribed text: {user_text}") | |
# Generate response using Llama 8b model with Groq API | |
chat_completion = groq_client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": user_text, | |
} | |
], | |
model="llama3-8b-8192", | |
) | |
response_text = chat_completion.choices[0].message.content | |
print(f"Response text: {response_text}") | |
# Convert response text to speech using gTTS | |
tts = gTTS(text=response_text, lang='en') | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio_file: | |
response_audio_path = temp_audio_file.name | |
tts.save(response_audio_path) | |
# Ensure the temporary file is properly closed before returning the path | |
temp_audio_file.close() | |
return response_text, response_audio_path | |
except Exception as e: | |
return f"Error: {str(e)}", None | |
# Create Gradio interface with updated layout | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
<style> | |
.gradio-container { | |
font-family: Arial, sans-serif; | |
background-color: #e0f7fa; /* Changed background color */ | |
border-radius: 10px; | |
padding: 20px; | |
box-shadow: 0 4px 12px rgba(0,0,0,0.2); | |
} | |
.gradio-input, .gradio-output { | |
border-radius: 6px; | |
border: 1px solid #ddd; | |
padding: 10px; | |
} | |
.gradio-button { | |
background-color: #28a745; | |
color: white; | |
border-radius: 6px; | |
border: none; | |
padding: 8px 16px; /* Adjusted padding */ | |
font-size: 16px; /* Adjusted font size */ | |
} | |
.gradio-button:hover { | |
background-color: #218838; | |
} | |
.gradio-title { | |
font-size: 24px; | |
font-weight: bold; | |
margin-bottom: 20px; | |
} | |
.gradio-description { | |
font-size: 14px; | |
margin-bottom: 20px; | |
color: #555; | |
} | |
</style> | |
""" | |
) | |
gr.Markdown("# Voice-to-Voice Chatbot\nDeveloped by Salman Maqbool") | |
gr.Markdown("Upload an audio file to interact with the voice-to-voice chatbot. The chatbot will transcribe the audio, generate a response, and provide a spoken reply.") | |
with gr.Row(): | |
with gr.Column(): | |
audio_input = gr.Audio(type="filepath", label="Upload Audio File") | |
submit_button = gr.Button("Submit") | |
with gr.Column(): | |
response_text = gr.Textbox(label="Response Text", placeholder="Generated response will appear here") | |
response_audio = gr.Audio(label="Response Audio", type="filepath") | |
submit_button.click(process_audio, inputs=audio_input, outputs=[response_text, response_audio]) | |
# Launch the Gradio app | |
demo.launch() |