Spaces:
Running
Running
import gradio as gr | |
import edge_tts | |
import asyncio | |
import os | |
from typing import Dict | |
async def generate_speech(text: str, voice: str) -> str: | |
"""Generate speech from text using the specified voice""" | |
try: | |
communicate = edge_tts.Communicate(text, voice) | |
output_file = "output.mp3" | |
await communicate.save(output_file) | |
return output_file | |
except Exception as e: | |
print(f"Error: {e}") | |
return None | |
# Expanded list of voices organized by language and gender | |
VOICES: Dict[str, str] = { | |
# English (US) | |
"Jenny (Female, US)": "en-US-JennyNeural", | |
"Guy (Male, US)": "en-US-GuyNeural", | |
"Aria (Female, US)": "en-US-AriaNeural", | |
"Davis (Male, US)": "en-US-DavisNeural", | |
"Jane (Female, US)": "en-US-JaneNeural", | |
"Jason (Male, US)": "en-US-JasonNeural", | |
"Nancy (Female, US)": "en-US-NancyNeural", | |
"Tony (Male, US)": "en-US-TonyNeural", | |
"Sara (Female, US)": "en-US-SaraNeural", | |
"Brandon (Male, US)": "en-US-BrandonNeural", | |
# English (UK) | |
"Libby (Female, UK)": "en-GB-LibbyNeural", | |
"Ryan (Male, UK)": "en-GB-RyanNeural", | |
"Sonia (Female, UK)": "en-GB-SoniaNeural", | |
"Thomas (Male, UK)": "en-GB-ThomasNeural", | |
# English (Australia) | |
"Natasha (Female, AU)": "en-AU-NatashaNeural", | |
"William (Male, AU)": "en-AU-WilliamNeural", | |
# English (India) | |
"Neerja (Female, IN)": "en-IN-NeerjaNeural", | |
"Prabhat (Male, IN)": "en-IN-PrabhatNeural", | |
# Spanish | |
"Elvira (Female, ES)": "es-ES-ElviraNeural", | |
"Alvaro (Male, ES)": "es-ES-AlvaroNeural", | |
"Ana (Female, AR)": "es-AR-AnaNeural", | |
"Tomas (Male, AR)": "es-AR-TomasNeural", | |
"Camila (Female, MX)": "es-MX-CamilaNeural", | |
"Jorge (Male, MX)": "es-MX-JorgeNeural", | |
# French | |
"Denise (Female, FR)": "fr-FR-DeniseNeural", | |
"Henri (Male, FR)": "fr-FR-HenriNeural", | |
"Sylvie (Female, CA)": "fr-CA-SylvieNeural", | |
"Antoine (Male, CA)": "fr-CA-AntoineNeural", | |
# German | |
"Katja (Female, DE)": "de-DE-KatjaNeural", | |
"Conrad (Male, DE)": "de-DE-ConradNeural", | |
"Amala (Female, CH)": "de-CH-AmalaNeural", | |
"Jan (Male, CH)": "de-CH-JanNeural", | |
# Italian | |
"Elsa (Female, IT)": "it-IT-ElsaNeural", | |
"Diego (Male, IT)": "it-IT-DiegoNeural", | |
# Portuguese | |
"Francisca (Female, PT)": "pt-PT-FranciscaNeural", | |
"Duarte (Male, PT)": "pt-PT-DuarteNeural", | |
"Yara (Female, BR)": "pt-BR-YaraNeural", | |
"Antonio (Male, BR)": "pt-BR-AntonioNeural", | |
# Japanese | |
"Nanami (Female, JP)": "ja-JP-NanamiNeural", | |
"Keita (Male, JP)": "ja-JP-KeitaNeural", | |
# Chinese | |
"Xiaoxiao (Female, CN)": "zh-CN-XiaoxiaoNeural", | |
"Yunyang (Male, CN)": "zh-CN-YunyangNeural", | |
"HsiaoChen (Female, TW)": "zh-TW-HsiaoChenNeural", | |
"YunJhe (Male, TW)": "zh-TW-YunJheNeural", | |
# Korean | |
"SunHi (Female, KR)": "ko-KR-SunHiNeural", | |
"InJoon (Male, KR)": "ko-KR-InJoonNeural", | |
# Russian | |
"Svetlana (Female, RU)": "ru-RU-SvetlanaNeural", | |
"Dmitry (Male, RU)": "ru-RU-DmitryNeural", | |
# Arabic | |
"Salma (Female, EG)": "ar-EG-SalmaNeural", | |
"Shakir (Male, EG)": "ar-EG-ShakirNeural", | |
"Hamed (Male, SA)": "ar-SA-HamedNeural", | |
# Hindi | |
"Swara (Female, IN)": "hi-IN-SwaraNeural", | |
"Madhur (Male, IN)": "hi-IN-MadhurNeural", | |
# And many more... | |
"Brigitte (Female, BE)": "fr-BE-BrigitteNeural", | |
"Gerard (Male, BE)": "fr-BE-GerardNeural", | |
"Finn (Male, NL)": "nl-NL-FinnNeural", | |
"Maarten (Male, NL)": "nl-NL-MaartenNeural", | |
"Sofie (Female, NL)": "nl-NL-SofieNeural", | |
} | |
def text_to_speech(text: str, voice: str) -> str: | |
"""Wrapper function to run async code""" | |
return asyncio.run(generate_speech(text, voice)) | |
def get_languages() -> list: | |
"""Extract unique languages from voice names""" | |
languages = set() | |
for name in VOICES.keys(): | |
if '(' in name and ')' in name: | |
lang = name.split('(')[1].split(')')[0] | |
languages.add(lang) | |
return sorted(languages) | |
def filter_voices(language: str) -> Dict[str, str]: | |
"""Filter voices by selected language""" | |
if not language or language == "All": | |
return VOICES | |
return {name: voice for name, voice in VOICES.items() if f"({language})" in name} | |
def update_voice_dropdown(language: str) -> Dict: | |
"""Update voice dropdown based on selected language""" | |
filtered_voices = filter_voices(language) | |
return gr.Dropdown.update( | |
choices=list(filtered_voices.keys()), | |
value=list(filtered_voices.keys())[0] if filtered_voices else None | |
) | |
with gr.Blocks(title="Multi-Voice Text-to-Speech", theme="soft") as demo: | |
gr.Markdown(""" | |
# 🎤 Advanced Text-to-Speech Converter | |
### With 100+ Voices Across Multiple Languages | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
text_input = gr.Textbox( | |
label="Enter your text", | |
placeholder="Type or paste your text here...", | |
lines=5, | |
max_lines=10 | |
) | |
with gr.Row(): | |
language_filter = gr.Dropdown( | |
["All"] + get_languages(), | |
label="Filter by Language", | |
value="All" | |
) | |
voice_dropdown = gr.Dropdown( | |
list(VOICES.keys()), | |
label="Select Voice", | |
value="Jenny (Female, US)" | |
) | |
generate_btn = gr.Button("Generate Speech", variant="primary") | |
with gr.Column(): | |
audio_output = gr.Audio(label="Generated Speech", autoplay=True) | |
download_btn = gr.DownloadButton( | |
"Download Audio", | |
visible=False, | |
file_name="speech.mp3" | |
) | |
# Interactive components | |
language_filter.change( | |
update_voice_dropdown, | |
inputs=language_filter, | |
outputs=voice_dropdown | |
) | |
generate_btn.click( | |
text_to_speech, | |
inputs=[text_input, voice_dropdown], | |
outputs=[audio_output, download_btn] | |
) | |
audio_output.change( | |
lambda: gr.DownloadButton.update(visible=True), | |
outputs=download_btn | |
) | |
if __name__ == "__main__": | |
demo.launch() |