EmRa228 commited on
Commit
6d598f3
·
verified ·
1 Parent(s): dd4fe56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -14
app.py CHANGED
@@ -3,44 +3,47 @@ from transformers import pipeline
3
  import edge_tts
4
  import numpy as np
5
 
6
- # بارگذاری مدل تبدیل گفتار به متن (Whisper small برای فارسی)
7
  stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
8
 
9
- # بارگذاری مدل چت‌بات (GPT2 تنظیم‌شده برای فارسی)
10
  chatbot = pipeline("text-generation", model="HooshvareLab/gpt2-fa")
11
 
12
- # تابع تبدیل متن به گفتار با استفاده از edge-tts
13
- def tts(text, voice="fa-IR-FaridNeural"):
14
  communicate = edge_tts.Communicate(text, voice)
15
- audio_data = b"".join([chunk["data"] for chunk in communicate.stream() if chunk["type"] == "audio"])
 
 
 
16
  audio_array = np.frombuffer(audio_data, dtype=np.int16)
17
- sample_rate = 24000 # طبق مستندات edge-tts
18
  return sample_rate, audio_array
19
 
20
- # تابع اصلی: خط لوله صوتی به صوتی
21
  def audio_to_audio(audio_input):
22
  sample_rate_in, data_in = audio_input
23
  audio = {"array": data_in, "sampling_rate": sample_rate_in}
24
 
25
- # مرحله 1: تبدیل گفتار به متن
26
  text = stt(audio)["text"]
27
 
28
- # مرحله 2: تولید پاسخ چت‌بات
29
  response = chatbot(text, max_length=50, num_return_sequences=1)[0]["generated_text"]
30
 
31
- # مرحله 3: تبدیل متن به گفتار
32
  sample_rate_out, data_out = tts(response)
33
 
34
  return (sample_rate_out, data_out)
35
 
36
- # رابط کاربری Gradio
37
  demo = gr.Interface(
38
  fn=audio_to_audio,
39
  inputs=gr.Audio(source="microphone", type="numpy"),
40
  outputs=gr.Audio(type="numpy"),
41
- title="چت‌بات صوتی فارسی",
42
- description="به فارسی صحبت کنید و برنامه به فارسی پاسخ می‌دهد."
43
  )
44
 
45
- # اجرای برنامه
46
  demo.launch()
 
3
  import edge_tts
4
  import numpy as np
5
 
6
+ # Load speech-to-text model (Whisper small for Farsi)
7
  stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
8
 
9
+ # Load chatbot model (GPT2 fine-tuned for Farsi)
10
  chatbot = pipeline("text-generation", model="HooshvareLab/gpt2-fa")
11
 
12
+ # Function to convert text to speech using edge-tts
13
+ async def tts(text, voice="fa-IR-FaridNeural"):
14
  communicate = edge_tts.Communicate(text, voice)
15
+ audio_data = b""
16
+ async for chunk in communicate.stream():
17
+ if chunk["type"] == "audio":
18
+ audio_data += chunk["data"]
19
  audio_array = np.frombuffer(audio_data, dtype=np.int16)
20
+ sample_rate = 24000 # As per edge-tts documentation
21
  return sample_rate, audio_array
22
 
23
+ # Main function: Audio-to-audio pipeline
24
  def audio_to_audio(audio_input):
25
  sample_rate_in, data_in = audio_input
26
  audio = {"array": data_in, "sampling_rate": sample_rate_in}
27
 
28
+ # Step 1: Convert speech to text
29
  text = stt(audio)["text"]
30
 
31
+ # Step 2: Generate chatbot response
32
  response = chatbot(text, max_length=50, num_return_sequences=1)[0]["generated_text"]
33
 
34
+ # Step 3: Convert text to speech
35
  sample_rate_out, data_out = tts(response)
36
 
37
  return (sample_rate_out, data_out)
38
 
39
+ # Gradio interface
40
  demo = gr.Interface(
41
  fn=audio_to_audio,
42
  inputs=gr.Audio(source="microphone", type="numpy"),
43
  outputs=gr.Audio(type="numpy"),
44
+ title="Farsi Audio Chatbot",
45
+ description="Speak in Farsi, and the app will respond in Farsi."
46
  )
47
 
48
+ # Launch the app
49
  demo.launch()