Spaces:
Sleeping
Sleeping
File size: 1,839 Bytes
25681f6 7fc6e29 7031ffc 1b41e6d 6d598f3 7fc6e29 25681f6 6d598f3 7fc6e29 25681f6 6d598f3 7fc6e29 6d598f3 7fc6e29 6d598f3 7fc6e29 37a2817 6d598f3 7031ffc e4097c6 7fc6e29 6d598f3 7fc6e29 7877a4f 6d598f3 7fc6e29 6d598f3 7031ffc 7fc6e29 6d598f3 7fc6e29 e4097c6 6d598f3 e4097c6 7fc6e29 1b41e6d 6d598f3 7fc6e29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import gradio as gr
from transformers import pipeline
import edge_tts
import numpy as np
import asyncio
# Print Gradio version for debugging
print(f"Gradio version: {gr.__version__}")
# Load speech-to-text model (Whisper small for Farsi)
stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
# Load chatbot model (GPT2 fine-tuned for Farsi)
chatbot = pipeline("text-generation", model="HooshvareLab/gpt2-fa")
# Function to convert text to speech using edge-tts
async def tts(text, voice="fa-IR-FaridNeural"):
communicate = edge_tts.Communicate(text, voice)
audio_data = b""
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data += chunk["data"]
audio_array = np.frombuffer(audio_data, dtype=np.int16)
sample_rate = 24000 # As per edge-tts documentation
return sample_rate, audio_array
# Main function: Audio-to-audio pipeline
async def audio_to_audio(audio_input):
if audio_input is None:
return None, "No audio input received."
sample_rate_in, data_in = audio_input
audio = {"array": data_in, "sampling_rate": sample_rate_in}
# Step 1: Convert speech to text
text = stt(audio)["text"]
# Step 2: Generate chatbot response
response = chatbot(text, max_length=50, num_return_sequences=1)[0]["generated_text"]
# Step 3: Convert text to speech
sample_rate_out, data_out = await tts(response)
return (sample_rate_out, data_out)
# Gradio interface
demo = gr.Interface(
fn=audio_to_audio,
inputs=gr.Audio(source="microphone", type="numpy", label="Speak in Farsi"),
outputs=gr.Audio(type="numpy", label="Response in Farsi"),
title="Farsi Audio Chatbot",
description="Speak in Farsi, and the app will respond in Farsi audio."
)
# Launch the app
demo.launch() |