Spaces:

wifix199
/

LumiVoice

Running

App Files Files Community

LumiVoice / app.py

wifix199

Update app.py

8f043b9 verified 5 months ago

raw

history blame

2.04 kB

	from openai import OpenAI
	import gradio as gr
	from gtts import gTTS
	import tempfile
	import librosa
	import numpy as np
	from transformers import WhisperProcessor, WhisperForConditionalGeneration
	import torch

	# OpenRouter API Setup
	client = OpenAI(
	base_url="https://openrouter.ai/api/v1",
	api_key=os.getenv("OPENROUTER_API_KEY")" # Replace with your OpenRouter API Key
	)

	def voice_assistant(audio_filepath):
	if audio_filepath is None:
	return "Please record your question.", None

	# Load Whisper model and processor
	processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3-turbo")
	model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3-turbo")

	# Load audio data using librosa
	audio_data, sample_rate = librosa.load(audio_filepath, sr=16000)

	# Convert audio to text using Whisper
	input_features = processor(audio_data, sampling_rate=sample_rate, return_tensors="pt").input_features
	predicted_ids = model.generate(input_features)
	user_voice = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]

	# Generate AI response using OpenRouter
	completion = client.chat.completions.create(
	model="qwen/qwen2.5-vl-32b-instruct:free",
	messages=[{"role": "user", "content": user_voice}]
	)

	ai_response = completion.choices[0].message.content

	# Convert AI response to speech using gTTS
	tts = gTTS(ai_response, lang="en")
	temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(temp_audio.name)

	return ai_response, temp_audio.name # ✅ Returning both text and audio

	# Gradio Interface
	iface = gr.Interface(
	fn=voice_assistant,
	inputs=gr.Audio(sources=["microphone"], type="filepath", label="Speak Your Question"),
	outputs=[gr.Textbox(label="AI Response"), gr.Audio(label="Voice Response")],
	title="AI Voice Assistant",
	description="Speak or type a question, and the AI will respond with voice output.",
	live=True
	)

	iface.launch()