Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	| from openai import OpenAI | |
| import gradio as gr | |
| from gtts import gTTS | |
| import tempfile | |
| import librosa | |
| import numpy as np | |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
| import torch | |
| # OpenRouter API Setup | |
| client = OpenAI( | |
| base_url="https://openrouter.ai/api/v1", | |
| api_key=os.getenv("OPENROUTER_API_KEY")" # Replace with your OpenRouter API Key | |
| ) | |
| def voice_assistant(audio_filepath): | |
| if audio_filepath is None: | |
| return "Please record your question.", None | |
| # Load Whisper model and processor | |
| processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3-turbo") | |
| model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3-turbo") | |
| # Load audio data using librosa | |
| audio_data, sample_rate = librosa.load(audio_filepath, sr=16000) | |
| # Convert audio to text using Whisper | |
| input_features = processor(audio_data, sampling_rate=sample_rate, return_tensors="pt").input_features | |
| predicted_ids = model.generate(input_features) | |
| user_voice = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] | |
| # Generate AI response using OpenRouter | |
| completion = client.chat.completions.create( | |
| model="qwen/qwen2.5-vl-32b-instruct:free", | |
| messages=[{"role": "user", "content": user_voice}] | |
| ) | |
| ai_response = completion.choices[0].message.content | |
| # Convert AI response to speech using gTTS | |
| tts = gTTS(ai_response, lang="en") | |
| temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| tts.save(temp_audio.name) | |
| return ai_response, temp_audio.name # ✅ Returning both text and audio | |
| # Gradio Interface | |
| iface = gr.Interface( | |
| fn=voice_assistant, | |
| inputs=gr.Audio(sources=["microphone"], type="filepath", label="Speak Your Question"), | |
| outputs=[gr.Textbox(label="AI Response"), gr.Audio(label="Voice Response")], | |
| title="AI Voice Assistant", | |
| description="Speak or type a question, and the AI will respond with voice output.", | |
| live=True | |
| ) | |
| iface.launch() | |