Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,10 @@ import gradio as gr
|
|
2 |
from transformers import pipeline
|
3 |
import edge_tts
|
4 |
import numpy as np
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# Load speech-to-text model (Whisper small for Farsi)
|
7 |
stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
|
@@ -21,7 +25,7 @@ async def tts(text, voice="fa-IR-FaridNeural"):
|
|
21 |
return sample_rate, audio_array
|
22 |
|
23 |
# Main function: Audio-to-audio pipeline
|
24 |
-
def audio_to_audio(audio_input):
|
25 |
sample_rate_in, data_in = audio_input
|
26 |
audio = {"array": data_in, "sampling_rate": sample_rate_in}
|
27 |
|
@@ -32,7 +36,7 @@ def audio_to_audio(audio_input):
|
|
32 |
response = chatbot(text, max_length=50, num_return_sequences=1)[0]["generated_text"]
|
33 |
|
34 |
# Step 3: Convert text to speech
|
35 |
-
sample_rate_out, data_out = tts(response)
|
36 |
|
37 |
return (sample_rate_out, data_out)
|
38 |
|
|
|
2 |
from transformers import pipeline
|
3 |
import edge_tts
|
4 |
import numpy as np
|
5 |
+
import asyncio
|
6 |
+
|
7 |
+
# Print Gradio version for debugging
|
8 |
+
print(f"Gradio version: {gr.__version__}")
|
9 |
|
10 |
# Load speech-to-text model (Whisper small for Farsi)
|
11 |
stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
|
|
|
25 |
return sample_rate, audio_array
|
26 |
|
27 |
# Main function: Audio-to-audio pipeline
|
28 |
+
async def audio_to_audio(audio_input):
|
29 |
sample_rate_in, data_in = audio_input
|
30 |
audio = {"array": data_in, "sampling_rate": sample_rate_in}
|
31 |
|
|
|
36 |
response = chatbot(text, max_length=50, num_return_sequences=1)[0]["generated_text"]
|
37 |
|
38 |
# Step 3: Convert text to speech
|
39 |
+
sample_rate_out, data_out = await tts(response)
|
40 |
|
41 |
return (sample_rate_out, data_out)
|
42 |
|