Spaces:

dschandra
/

AIVoice

Sleeping

App Files Files Community

AIVoice / app.py

dschandra

Update app.py

b10a4cb verified 9 months ago

raw

history blame

2.03 kB

	import gradio as gr
	import torch
	from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
	from gtts import gTTS
	import os

	# Load Wav2Vec2 model and processor for speech-to-text
	processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
	model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")

	def speech_to_text(audio_file):
	# Load audio file and process with Wav2Vec 2.0
	audio_input, _ = librosa.load(audio_file, sr=16000)
	input_values = processor(audio_input, return_tensors="pt").input_values

	# Perform speech-to-text
	with torch.no_grad():
	logits = model(input_values).logits

	# Get the predicted ids and convert them back to text
	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = processor.decode(predicted_ids[0])

	return transcription

	def generate_response(text):
	# Using Hugging Face to generate a text-based response
	# Use any model like DialoGPT for text response generation
	conversational_pipeline = pipeline("text-generation", model="microsoft/DialoGPT-medium")
	response = conversational_pipeline(text, max_length=50)
	return response[0]['generated_text']

	def process_audio(audio_file):
	# Convert speech to text using Wav2Vec 2.0
	text = speech_to_text(audio_file)
	print(f"User said: {text}")

	# Get the bot's response
	bot_response = generate_response(text)
	print(f"Bot response: {bot_response}")

	# Convert the bot's response to speech
	tts = gTTS(bot_response)
	tts.save("response.mp3")

	# Play the response
	os.system("mpg321 response.mp3")

	return bot_response, "response.mp3"

	# Create Gradio interface for audio input/output
	iface = gr.Interface(
	fn=process_audio,
	inputs=gr.inputs.Audio(source="microphone", type="file"),
	outputs=[gr.outputs.Textbox(), gr.outputs.Audio(type="file")],
	live=True,
	title="Voice Bot with Wav2Vec2.0",
	description="Speak to the bot and get a response!"
	)

	# Launch the interface
	iface.launch()