Edge-TTS-Text-to-Speech

Sleeping

App Files Files Community

EmRa228 commited on May 8

Commit

7fc6e29

verified ·

1 Parent(s): 51586a5

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -129

app.py CHANGED Viewed

@@ -1,136 +1,46 @@
-import asyncio
-import os
-import edge_tts
 import gradio as gr
-from datetime import datetime
-# Function to get available voices
-async def get_voices():
-    try:
-        voices = await edge_tts.list_voices()
-        return sorted([f"{voice['ShortName']} ({voice['Gender']})" for voice in voices])
-    except Exception as e:
-        return [f"Error fetching voices: {str(e)}"]
-# Function to convert text to speech
-async def text_to_speech(text, voice, rate, pitch):
-    try:
-        if not text or not voice:
-            return None, "Error: Text and voice selection are required."
-        # Extract voice ShortName (e.g., "en-US-AvaNeural (Female)" -> "en-US-AvaNeural")
-        voice_short_name = voice.split(" (")[0]
-        # Convert rate to edge-tts format (e.g., 10 -> "+10%", -10 -> "-10%")
-        rate_str = f"+{int(rate)}%" if rate >= 0 else f"{int(rate)}%"
-        # Convert pitch to edge-tts format (e.g., 100 -> "+100Hz", -100 -> "-100Hz")
-        pitch_str = f"+{int(pitch)}Hz" if pitch >= 0 else f"{int(pitch)}Hz"
-        # Generate unique output filename with timestamp
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        output_file = f"output_{timestamp}.mp3"
-        # Initialize edge-tts communication
-        communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
-        # Save the audio
-        await communicate.save(output_file)
-        # Check if file was created
-        if os.path.exists(output_file):
-            return output_file, "Audio generated successfully!"
-        else:
-            return None, "Error: Audio file was not generated."
-    except Exception as e:
-        return None, f"Error: {str(e)}"
-# Gradio interface function
-def create_gradio_interface():
-    # Get voices synchronously
-    loop = asyncio.get_event_loop()
-    voices = loop.run_until_complete(get_voices())
-    # Custom CSS for a polished look
-    css = """
-    .gradio-container {background-color: #f5f7fa;}
-    .title {text-align: center; color: #2c3e50;}
-    .footer {text-align: center; color: #7f8c8d; font-size: 0.9em; margin-top: 20px;}
-    .button-primary {background-color: #3498db !important; color: white !important;}
-    .input-box {border-radius: 8px;}
-    """
-    # Define Gradio interface
-    with gr.Blocks(css=css, theme=gr.themes.Soft()) as interface:
-        gr.Markdown(
-            """
-            <h1 class='title'>Edge TTS Text-to-Speech</h1>
-            <p style='text-align: center;'>Convert text to speech with customizable voice, rate, and pitch.</p>
-            """
-        )
-        with gr.Row():
-            with gr.Column(scale=2):
-                text_input = gr.Textbox(
-                    label="Input Text",
-                    placeholder="Enter the text you want to convert to speech...",
-                    lines=5,
-                    elem_classes="input-box"
-                )
-                voice_dropdown = gr.Dropdown(
-                    choices=voices,
-                    label="Voice Model",
-                    value=voices[0] if voices else None,
-                    allow_custom_value=False
-                )
-                rate_slider = gr.Slider(
-                    minimum=-50,
-                    maximum=50,
-                    value=0,
-                    step=1,
-                    label="Speech Rate (%)",
-                    info="Adjust the speed of the speech (±50%)"
-                )
-                pitch_slider = gr.Slider(
-                    minimum=-200,
-                    maximum=200,
-                    value=0,
-                    step=10,
-                    label="Pitch (Hz)",
-                    info="Adjust the pitch of the voice (±200Hz)"
-                )
-                generate_button = gr.Button("Generate Audio", variant="primary", elem_classes="button-primary")
-            with gr.Column(scale=1):
-                audio_output = gr.Audio(label="Generated Audio", interactive=False)
-                status_output = gr.Textbox(
-                    label="Status",
-                    interactive=False,
-                    placeholder="Status messages will appear here..."
-                )
-        # Button click event
-        async def on_generate(text, voice, rate, pitch):
-            audio, status = await text_to_speech(text, voice, rate, pitch)
-            return audio, status
-        generate_button.click(
-            fn=on_generate,
-            inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
-            outputs=[audio_output, status_output]
-        )
-        gr.Markdown(
-            """
-            <p class='footer'>
-                Powered by Edge TTS and Gradio | Deployed on Hugging Face Spaces
-            </p>
-            """
-        )
-    return interface
-# Launch the interface
-if __name__ == "__main__":
-    interface = create_gradio_interface()
-    interface.launch(server_name="0.0.0.0", server_port=7860, share=False)

 import gradio as gr
+from transformers import pipeline
+import edge_tts
+import numpy as np
+# بارگذاری مدل تبدیل گفتار به متن (Whisper small برای فارسی)
+stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
+# بارگذاری مدل چت‌بات (GPT2 تنظیم‌شده برای فارسی)
+chatbot = pipeline("text-generation", model="HooshvareLab/gpt2-fa")
+# تابع تبدیل متن به گفتار با استفاده از edge-tts
+def tts(text, voice="fa-IR-FaridNeural"):
+    communicate = edge_tts.Communicate(text, voice)
+    audio_data = b"".join([chunk["data"] for chunk in communicate.stream() if chunk["type"] == "audio"])
+    audio_array = np.frombuffer(audio_data, dtype=np.int16)
+    sample_rate = 24000  # طبق مستندات edge-tts
+    return sample_rate, audio_array
+# تابع اصلی: خط لوله صوتی به صوتی
+def audio_to_audio(audio_input):
+    sample_rate_in, data_in = audio_input
+    audio = {"array": data_in, "sampling_rate": sample_rate_in}
+    # مرحله 1: تبدیل گفتار به متن
+    text = stt(audio)["text"]
+    # مرحله 2: تولید پاسخ چت‌بات
+    response = chatbot(text, max_length=50, num_return_sequences=1)[0]["generated_text"]
+    # مرحله 3: تبدیل متن به گفتار
+    sample_rate_out, data_out = tts(response)
+    return (sample_rate_out, data_out)
+# رابط کاربری Gradio
+demo = gr.Interface(
+    fn=audio_to_audio,
+    inputs=gr.Audio(source="microphone", type="numpy"),
+    outputs=gr.Audio(type="numpy"),
+    title="چت‌بات صوتی فارسی",
+    description="به فارسی صحبت کنید و برنامه به فارسی پاسخ می‌دهد."
+)
+# اجرای برنامه
+demo.launch()