File size: 1,707 Bytes
25681f6
7fc6e29
 
 
7031ffc
cf014fb
7031ffc
cf014fb
7fc6e29
 
25681f6
cf014fb
7fc6e29
6d598f3
 
 
 
7fc6e29
cf014fb
7fc6e29
37a2817
7031ffc
e4097c6
 
7fc6e29
 
cf014fb
7fc6e29
cf014fb
7fc6e29
cf014fb
 
7fc6e29
6d598f3
7fc6e29
 
cf014fb
 
 
 
 
e4097c6
6d598f3
cf014fb
 
7fc6e29
1b41e6d
cf014fb
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import gradio as gr
from transformers import pipeline
import edge_tts
import numpy as np
import asyncio
import os

# Load STT and chatbot pipelines
stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
chatbot = pipeline("text-generation", model="HooshvareLab/gpt2-fa")

async def tts(text: str, voice: str = "fa-IR-FaridNeural"):
    communicate = edge_tts.Communicate(text, voice)
    audio_data = b""
    async for chunk in communicate.stream():
        if chunk["type"] == "audio":
            audio_data += chunk["data"]
    audio_array = np.frombuffer(audio_data, dtype=np.int16)
    sample_rate = 24000
    return sample_rate, audio_array

async def audio_to_audio(audio_input):
    if audio_input is None:
        return None, "No audio input received."
    sample_rate_in, data_in = audio_input
    audio = {"array": data_in, "sampling_rate": sample_rate_in}
    # 1. ASR → text
    text = stt(audio)["text"]
    # 2. Generate response
    response = chatbot(text, max_length=50, num_return_sequences=1)[0]["generated_text"]
    # 3. TTS
    return await tts(response)

# Gradio interface
demo = gr.Interface(
    fn=audio_to_audio,
    inputs=gr.Audio(
        sources=["microphone"],      # Use 'sources' instead of deprecated 'source' :contentReference[oaicite:2]{index=2}
        type="numpy",
        label="Speak in Farsi"
    ),
    outputs=gr.Audio(type="numpy", label="Response in Farsi"),
    title="Farsi Audio Chatbot",
    description="Speak in Farsi, and the app will respond in Farsi audio.",
    allow_flagging="never"
)

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=int(os.environ.get("PORT", 7860))
    )