EmRa228 commited on
Commit
7fc6e29
·
verified ·
1 Parent(s): 51586a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -129
app.py CHANGED
@@ -1,136 +1,46 @@
1
- import asyncio
2
- import os
3
- import edge_tts
4
  import gradio as gr
5
- from datetime import datetime
 
 
6
 
7
- # Function to get available voices
8
- async def get_voices():
9
- try:
10
- voices = await edge_tts.list_voices()
11
- return sorted([f"{voice['ShortName']} ({voice['Gender']})" for voice in voices])
12
- except Exception as e:
13
- return [f"Error fetching voices: {str(e)}"]
14
 
15
- # Function to convert text to speech
16
- async def text_to_speech(text, voice, rate, pitch):
17
- try:
18
- if not text or not voice:
19
- return None, "Error: Text and voice selection are required."
20
-
21
- # Extract voice ShortName (e.g., "en-US-AvaNeural (Female)" -> "en-US-AvaNeural")
22
- voice_short_name = voice.split(" (")[0]
23
-
24
- # Convert rate to edge-tts format (e.g., 10 -> "+10%", -10 -> "-10%")
25
- rate_str = f"+{int(rate)}%" if rate >= 0 else f"{int(rate)}%"
26
-
27
- # Convert pitch to edge-tts format (e.g., 100 -> "+100Hz", -100 -> "-100Hz")
28
- pitch_str = f"+{int(pitch)}Hz" if pitch >= 0 else f"{int(pitch)}Hz"
29
-
30
- # Generate unique output filename with timestamp
31
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
32
- output_file = f"output_{timestamp}.mp3"
33
-
34
- # Initialize edge-tts communication
35
- communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
36
-
37
- # Save the audio
38
- await communicate.save(output_file)
39
-
40
- # Check if file was created
41
- if os.path.exists(output_file):
42
- return output_file, "Audio generated successfully!"
43
- else:
44
- return None, "Error: Audio file was not generated."
45
- except Exception as e:
46
- return None, f"Error: {str(e)}"
47
 
48
- # Gradio interface function
49
- def create_gradio_interface():
50
- # Get voices synchronously
51
- loop = asyncio.get_event_loop()
52
- voices = loop.run_until_complete(get_voices())
53
-
54
- # Custom CSS for a polished look
55
- css = """
56
- .gradio-container {background-color: #f5f7fa;}
57
- .title {text-align: center; color: #2c3e50;}
58
- .footer {text-align: center; color: #7f8c8d; font-size: 0.9em; margin-top: 20px;}
59
- .button-primary {background-color: #3498db !important; color: white !important;}
60
- .input-box {border-radius: 8px;}
61
- """
62
 
63
- # Define Gradio interface
64
- with gr.Blocks(css=css, theme=gr.themes.Soft()) as interface:
65
- gr.Markdown(
66
- """
67
- <h1 class='title'>Edge TTS Text-to-Speech</h1>
68
- <p style='text-align: center;'>Convert text to speech with customizable voice, rate, and pitch.</p>
69
- """
70
- )
71
-
72
- with gr.Row():
73
- with gr.Column(scale=2):
74
- text_input = gr.Textbox(
75
- label="Input Text",
76
- placeholder="Enter the text you want to convert to speech...",
77
- lines=5,
78
- elem_classes="input-box"
79
- )
80
- voice_dropdown = gr.Dropdown(
81
- choices=voices,
82
- label="Voice Model",
83
- value=voices[0] if voices else None,
84
- allow_custom_value=False
85
- )
86
- rate_slider = gr.Slider(
87
- minimum=-50,
88
- maximum=50,
89
- value=0,
90
- step=1,
91
- label="Speech Rate (%)",
92
- info="Adjust the speed of the speech (±50%)"
93
- )
94
- pitch_slider = gr.Slider(
95
- minimum=-200,
96
- maximum=200,
97
- value=0,
98
- step=10,
99
- label="Pitch (Hz)",
100
- info="Adjust the pitch of the voice (±200Hz)"
101
- )
102
- generate_button = gr.Button("Generate Audio", variant="primary", elem_classes="button-primary")
103
-
104
- with gr.Column(scale=1):
105
- audio_output = gr.Audio(label="Generated Audio", interactive=False)
106
- status_output = gr.Textbox(
107
- label="Status",
108
- interactive=False,
109
- placeholder="Status messages will appear here..."
110
- )
111
-
112
- # Button click event
113
- async def on_generate(text, voice, rate, pitch):
114
- audio, status = await text_to_speech(text, voice, rate, pitch)
115
- return audio, status
116
-
117
- generate_button.click(
118
- fn=on_generate,
119
- inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
120
- outputs=[audio_output, status_output]
121
- )
122
-
123
- gr.Markdown(
124
- """
125
- <p class='footer'>
126
- Powered by Edge TTS and Gradio | Deployed on Hugging Face Spaces
127
- </p>
128
- """
129
- )
130
 
131
- return interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
- # Launch the interface
134
- if __name__ == "__main__":
135
- interface = create_gradio_interface()
136
- interface.launch(server_name="0.0.0.0", server_port=7860, share=False)
 
 
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+ import edge_tts
4
+ import numpy as np
5
 
6
+ # بارگذاری مدل تبدیل گفتار به متن (Whisper small برای فارسی)
7
+ stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
 
 
 
 
 
8
 
9
+ # بارگذاری مدل چت‌بات (GPT2 تنظیم‌شده برای فارسی)
10
+ chatbot = pipeline("text-generation", model="HooshvareLab/gpt2-fa")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # تابع تبدیل متن به گفتار با استفاده از edge-tts
13
+ def tts(text, voice="fa-IR-FaridNeural"):
14
+ communicate = edge_tts.Communicate(text, voice)
15
+ audio_data = b"".join([chunk["data"] for chunk in communicate.stream() if chunk["type"] == "audio"])
16
+ audio_array = np.frombuffer(audio_data, dtype=np.int16)
17
+ sample_rate = 24000 # طبق مستندات edge-tts
18
+ return sample_rate, audio_array
 
 
 
 
 
 
 
19
 
20
+ # تابع اصلی: خط لوله صوتی به صوتی
21
+ def audio_to_audio(audio_input):
22
+ sample_rate_in, data_in = audio_input
23
+ audio = {"array": data_in, "sampling_rate": sample_rate_in}
24
+
25
+ # مرحله 1: تبدیل گفتار به متن
26
+ text = stt(audio)["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # مرحله 2: تولید پاسخ چت‌بات
29
+ response = chatbot(text, max_length=50, num_return_sequences=1)[0]["generated_text"]
30
+
31
+ # مرحله 3: تبدیل متن به گفتار
32
+ sample_rate_out, data_out = tts(response)
33
+
34
+ return (sample_rate_out, data_out)
35
+
36
+ # رابط کاربری Gradio
37
+ demo = gr.Interface(
38
+ fn=audio_to_audio,
39
+ inputs=gr.Audio(source="microphone", type="numpy"),
40
+ outputs=gr.Audio(type="numpy"),
41
+ title="چت‌بات صوتی فارسی",
42
+ description="به فارسی صحبت کنید و برنامه به فارسی پاسخ می‌دهد."
43
+ )
44
 
45
+ # اجرای برنامه
46
+ demo.launch()