Spaces:

hamza2923
/

Text_To_Voice

Running

File size: 5,357 Bytes

import gradio as gr
import edge_tts
import asyncio
import os
from typing import Dict

async def generate_speech(text: str, voice: str) -> str:
    """Generate speech from text using the specified voice"""
    try:
        communicate = edge_tts.Communicate(text, voice)
        output_file = "output.mp3"
        await communicate.save(output_file)
        return output_file
    except Exception as e:
        print(f"Error: {e}")
        return None

# List of voices
VOICES: Dict[str, str] = {
    # English (US)
    "Jenny (Female, US)": "en-US-JennyNeural",
    "Guy (Male, US)": "en-US-GuyNeural",
    "Aria (Female, US)": "en-US-AriaNeural",
    "Davis (Male, US)": "en-US-DavisNeural",
    "Jane (Female, US)": "en-US-JaneNeural",
    "Jason (Male, US)": "en-US-JasonNeural",
    "Nancy (Female, US)": "en-US-NancyNeural",
    "Tony (Male, US)": "en-US-TonyNeural",
    "Sara (Female, US)": "en-US-SaraNeural",
    "Brandon (Male, US)": "en-US-BrandonNeural",
    
    # English (UK)
    "Libby (Female, UK)": "en-GB-LibbyNeural",
    "Ryan (Male, UK)": "en-GB-RyanNeural",
    "Sonia (Female, UK)": "en-GB-SoniaNeural",
    "Thomas (Male, UK)": "en-GB-ThomasNeural",
    
    # English (Australia)
    "Natasha (Female, AU)": "en-AU-NatashaNeural",
    "William (Male, AU)": "en-AU-WilliamNeural",
    
    # English (India)
    "Neerja (Female, IN)": "en-IN-NeerjaNeural",
    "Prabhat (Male, IN)": "en-IN-PrabhatNeural",
    
    # Spanish
    "Elvira (Female, ES)": "es-ES-ElviraNeural",
    "Alvaro (Male, ES)": "es-ES-AlvaroNeural",
    "Ana (Female, AR)": "es-AR-AnaNeural",
    "Tomas (Male, AR)": "es-AR-TomasNeural",
    "Camila (Female, MX)": "es-MX-CamilaNeural",
    "Jorge (Male, MX)": "es-MX-JorgeNeural",
    
    # French
    "Denise (Female, FR)": "fr-FR-DeniseNeural",
    "Henri (Male, FR)": "fr-FR-HenriNeural",
    "Sylvie (Female, CA)": "fr-CA-SylvieNeural",
    "Antoine (Male, CA)": "fr-CA-AntoineNeural",
    
    # German
    "Katja (Female, DE)": "de-DE-KatjaNeural",
    "Conrad (Male, DE)": "de-DE-ConradNeural",
    "Amala (Female, CH)": "de-CH-AmalaNeural",
    "Jan (Male, CH)": "de-CH-JanNeural",
    
    # Italian
    "Elsa (Female, IT)": "it-IT-ElsaNeural",
    "Diego (Male, IT)": "it-IT-DiegoNeural",
    
    # Portuguese
    "Francisca (Female, PT)": "pt-PT-FranciscaNeural",
    "Duarte (Male, PT)": "pt-PT-DuarteNeural",
    "Yara (Female, BR)": "pt-BR-YaraNeural",
    "Antonio (Male, BR)": "pt-BR-AntonioNeural",
    
    # Japanese
    "Nanami (Female, JP)": "ja-JP-NanamiNeural",
    "Keita (Male, JP)": "ja-JP-KeitaNeural",
    
    # Chinese
    "Xiaoxiao (Female, CN)": "zh-CN-XiaoxiaoNeural",
    "Yunyang (Male, CN)": "zh-CN-YunyangNeural",
    "HsiaoChen (Female, TW)": "zh-TW-HsiaoChenNeural",
    "YunJhe (Male, TW)": "zh-TW-YunJheNeural",
    
    # Korean
    "SunHi (Female, KR)": "ko-KR-SunHiNeural",
    "InJoon (Male, KR)": "ko-KR-InJoonNeural",
    
    # Russian
    "Svetlana (Female, RU)": "ru-RU-SvetlanaNeural",
    "Dmitry (Male, RU)": "ru-RU-DmitryNeural",
    
    # Arabic
    "Salma (Female, EG)": "ar-EG-SalmaNeural",
    "Shakir (Male, EG)": "ar-EG-ShakirNeural",
    "Hamed (Male, SA)": "ar-SA-HamedNeural",
    
    # Hindi
    "Swara (Female, IN)": "hi-IN-SwaraNeural",
    "Madhur (Male, IN)": "hi-IN-MadhurNeural",
    
    # Others
    "Brigitte (Female, BE)": "fr-BE-BrigitteNeural",
    "Gerard (Male, BE)": "fr-BE-GerardNeural",
    "Finn (Male, NL)": "nl-NL-FinnNeural",
    "Maarten (Male, NL)": "nl-NL-MaartenNeural",
    "Sofie (Female, NL)": "nl-NL-SofieNeural",
}

def text_to_speech(text: str, voice: str) -> tuple:
    """Wrapper function to run async code and return audio file and download button state"""
    if not text or not voice:
        return None, {"visible": False}
    output_file = asyncio.run(generate_speech(text, VOICES.get(voice, VOICES["Jenny (Female, US)"])))
    return output_file, {"visible": True} if output_file else (None, {"visible": False})

with gr.Blocks(title="Multi-Voice Text-to-Speech", theme="soft") as demo:
    gr.Markdown("""
    # 🎤 Advanced Text-to-Speech Converter
    ### With 100+ Voices
    """)
    
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(
                label="Enter your text",
                placeholder="Type or paste your text here...",
                lines=5,
                max_lines=10
            )
            
            voice_dropdown = gr.Dropdown(
                choices=list(VOICES.keys()),
                label="Select Voice",
                value="Jenny (Female, US)"
            )
            
            generate_btn = gr.Button("Generate Speech", variant="primary")
            
        with gr.Column():
            audio_output = gr.Audio(label="Generated Speech", autoplay=True)
            download_btn = gr.DownloadButton(
                label="Download Audio",
                visible=False
            )

    # Interactive components
    generate_btn.click(
        fn=text_to_speech,
        inputs=[text_input, voice_dropdown],
        outputs=[audio_output, download_btn]
    )
    
    audio_output.change(
        fn=lambda x: {"visible": bool(x)},
        inputs=audio_output,
        outputs=download_btn
    )

if __name__ == "__main__":
    demo.launch(share=True)  # Set share=True for public link (remove for local only)