Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,136 +1,46 @@
|
|
1 |
-
import asyncio
|
2 |
-
import os
|
3 |
-
import edge_tts
|
4 |
import gradio as gr
|
5 |
-
from
|
|
|
|
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
try:
|
10 |
-
voices = await edge_tts.list_voices()
|
11 |
-
return sorted([f"{voice['ShortName']} ({voice['Gender']})" for voice in voices])
|
12 |
-
except Exception as e:
|
13 |
-
return [f"Error fetching voices: {str(e)}"]
|
14 |
|
15 |
-
#
|
16 |
-
|
17 |
-
try:
|
18 |
-
if not text or not voice:
|
19 |
-
return None, "Error: Text and voice selection are required."
|
20 |
-
|
21 |
-
# Extract voice ShortName (e.g., "en-US-AvaNeural (Female)" -> "en-US-AvaNeural")
|
22 |
-
voice_short_name = voice.split(" (")[0]
|
23 |
-
|
24 |
-
# Convert rate to edge-tts format (e.g., 10 -> "+10%", -10 -> "-10%")
|
25 |
-
rate_str = f"+{int(rate)}%" if rate >= 0 else f"{int(rate)}%"
|
26 |
-
|
27 |
-
# Convert pitch to edge-tts format (e.g., 100 -> "+100Hz", -100 -> "-100Hz")
|
28 |
-
pitch_str = f"+{int(pitch)}Hz" if pitch >= 0 else f"{int(pitch)}Hz"
|
29 |
-
|
30 |
-
# Generate unique output filename with timestamp
|
31 |
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
32 |
-
output_file = f"output_{timestamp}.mp3"
|
33 |
-
|
34 |
-
# Initialize edge-tts communication
|
35 |
-
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
|
36 |
-
|
37 |
-
# Save the audio
|
38 |
-
await communicate.save(output_file)
|
39 |
-
|
40 |
-
# Check if file was created
|
41 |
-
if os.path.exists(output_file):
|
42 |
-
return output_file, "Audio generated successfully!"
|
43 |
-
else:
|
44 |
-
return None, "Error: Audio file was not generated."
|
45 |
-
except Exception as e:
|
46 |
-
return None, f"Error: {str(e)}"
|
47 |
|
48 |
-
#
|
49 |
-
def
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
css = """
|
56 |
-
.gradio-container {background-color: #f5f7fa;}
|
57 |
-
.title {text-align: center; color: #2c3e50;}
|
58 |
-
.footer {text-align: center; color: #7f8c8d; font-size: 0.9em; margin-top: 20px;}
|
59 |
-
.button-primary {background-color: #3498db !important; color: white !important;}
|
60 |
-
.input-box {border-radius: 8px;}
|
61 |
-
"""
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
)
|
71 |
-
|
72 |
-
with gr.Row():
|
73 |
-
with gr.Column(scale=2):
|
74 |
-
text_input = gr.Textbox(
|
75 |
-
label="Input Text",
|
76 |
-
placeholder="Enter the text you want to convert to speech...",
|
77 |
-
lines=5,
|
78 |
-
elem_classes="input-box"
|
79 |
-
)
|
80 |
-
voice_dropdown = gr.Dropdown(
|
81 |
-
choices=voices,
|
82 |
-
label="Voice Model",
|
83 |
-
value=voices[0] if voices else None,
|
84 |
-
allow_custom_value=False
|
85 |
-
)
|
86 |
-
rate_slider = gr.Slider(
|
87 |
-
minimum=-50,
|
88 |
-
maximum=50,
|
89 |
-
value=0,
|
90 |
-
step=1,
|
91 |
-
label="Speech Rate (%)",
|
92 |
-
info="Adjust the speed of the speech (±50%)"
|
93 |
-
)
|
94 |
-
pitch_slider = gr.Slider(
|
95 |
-
minimum=-200,
|
96 |
-
maximum=200,
|
97 |
-
value=0,
|
98 |
-
step=10,
|
99 |
-
label="Pitch (Hz)",
|
100 |
-
info="Adjust the pitch of the voice (±200Hz)"
|
101 |
-
)
|
102 |
-
generate_button = gr.Button("Generate Audio", variant="primary", elem_classes="button-primary")
|
103 |
-
|
104 |
-
with gr.Column(scale=1):
|
105 |
-
audio_output = gr.Audio(label="Generated Audio", interactive=False)
|
106 |
-
status_output = gr.Textbox(
|
107 |
-
label="Status",
|
108 |
-
interactive=False,
|
109 |
-
placeholder="Status messages will appear here..."
|
110 |
-
)
|
111 |
-
|
112 |
-
# Button click event
|
113 |
-
async def on_generate(text, voice, rate, pitch):
|
114 |
-
audio, status = await text_to_speech(text, voice, rate, pitch)
|
115 |
-
return audio, status
|
116 |
-
|
117 |
-
generate_button.click(
|
118 |
-
fn=on_generate,
|
119 |
-
inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
|
120 |
-
outputs=[audio_output, status_output]
|
121 |
-
)
|
122 |
-
|
123 |
-
gr.Markdown(
|
124 |
-
"""
|
125 |
-
<p class='footer'>
|
126 |
-
Powered by Edge TTS and Gradio | Deployed on Hugging Face Spaces
|
127 |
-
</p>
|
128 |
-
"""
|
129 |
-
)
|
130 |
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
-
#
|
134 |
-
|
135 |
-
interface = create_gradio_interface()
|
136 |
-
interface.launch(server_name="0.0.0.0", server_port=7860, share=False)
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
import edge_tts
|
4 |
+
import numpy as np
|
5 |
|
6 |
+
# بارگذاری مدل تبدیل گفتار به متن (Whisper small برای فارسی)
|
7 |
+
stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# بارگذاری مدل چتبات (GPT2 تنظیمشده برای فارسی)
|
10 |
+
chatbot = pipeline("text-generation", model="HooshvareLab/gpt2-fa")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
+
# تابع تبدیل متن به گفتار با استفاده از edge-tts
|
13 |
+
def tts(text, voice="fa-IR-FaridNeural"):
|
14 |
+
communicate = edge_tts.Communicate(text, voice)
|
15 |
+
audio_data = b"".join([chunk["data"] for chunk in communicate.stream() if chunk["type"] == "audio"])
|
16 |
+
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
17 |
+
sample_rate = 24000 # طبق مستندات edge-tts
|
18 |
+
return sample_rate, audio_array
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
# تابع اصلی: خط لوله صوتی به صوتی
|
21 |
+
def audio_to_audio(audio_input):
|
22 |
+
sample_rate_in, data_in = audio_input
|
23 |
+
audio = {"array": data_in, "sampling_rate": sample_rate_in}
|
24 |
+
|
25 |
+
# مرحله 1: تبدیل گفتار به متن
|
26 |
+
text = stt(audio)["text"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
# مرحله 2: تولید پاسخ چتبات
|
29 |
+
response = chatbot(text, max_length=50, num_return_sequences=1)[0]["generated_text"]
|
30 |
+
|
31 |
+
# مرحله 3: تبدیل متن به گفتار
|
32 |
+
sample_rate_out, data_out = tts(response)
|
33 |
+
|
34 |
+
return (sample_rate_out, data_out)
|
35 |
+
|
36 |
+
# رابط کاربری Gradio
|
37 |
+
demo = gr.Interface(
|
38 |
+
fn=audio_to_audio,
|
39 |
+
inputs=gr.Audio(source="microphone", type="numpy"),
|
40 |
+
outputs=gr.Audio(type="numpy"),
|
41 |
+
title="چتبات صوتی فارسی",
|
42 |
+
description="به فارسی صحبت کنید و برنامه به فارسی پاسخ میدهد."
|
43 |
+
)
|
44 |
|
45 |
+
# اجرای برنامه
|
46 |
+
demo.launch()
|
|
|
|