Text_To_Voice / app.py
hamza2923's picture
Update app.py
3c38334 verified
raw
history blame
6.34 kB
import gradio as gr
import edge_tts
import asyncio
import os
from typing import Dict
async def generate_speech(text: str, voice: str) -> str:
"""Generate speech from text using the specified voice"""
try:
communicate = edge_tts.Communicate(text, voice)
output_file = "output.mp3"
await communicate.save(output_file)
return output_file
except Exception as e:
print(f"Error: {e}")
return None
# Expanded list of voices organized by language and gender
VOICES: Dict[str, str] = {
# English (US)
"Jenny (Female, US)": "en-US-JennyNeural",
"Guy (Male, US)": "en-US-GuyNeural",
"Aria (Female, US)": "en-US-AriaNeural",
"Davis (Male, US)": "en-US-DavisNeural",
"Jane (Female, US)": "en-US-JaneNeural",
"Jason (Male, US)": "en-US-JasonNeural",
"Nancy (Female, US)": "en-US-NancyNeural",
"Tony (Male, US)": "en-US-TonyNeural",
"Sara (Female, US)": "en-US-SaraNeural",
"Brandon (Male, US)": "en-US-BrandonNeural",
# English (UK)
"Libby (Female, UK)": "en-GB-LibbyNeural",
"Ryan (Male, UK)": "en-GB-RyanNeural",
"Sonia (Female, UK)": "en-GB-SoniaNeural",
"Thomas (Male, UK)": "en-GB-ThomasNeural",
# English (Australia)
"Natasha (Female, AU)": "en-AU-NatashaNeural",
"William (Male, AU)": "en-AU-WilliamNeural",
# English (India)
"Neerja (Female, IN)": "en-IN-NeerjaNeural",
"Prabhat (Male, IN)": "en-IN-PrabhatNeural",
# Spanish
"Elvira (Female, ES)": "es-ES-ElviraNeural",
"Alvaro (Male, ES)": "es-ES-AlvaroNeural",
"Ana (Female, AR)": "es-AR-AnaNeural",
"Tomas (Male, AR)": "es-AR-TomasNeural",
"Camila (Female, MX)": "es-MX-CamilaNeural",
"Jorge (Male, MX)": "es-MX-JorgeNeural",
# French
"Denise (Female, FR)": "fr-FR-DeniseNeural",
"Henri (Male, FR)": "fr-FR-HenriNeural",
"Sylvie (Female, CA)": "fr-CA-SylvieNeural",
"Antoine (Male, CA)": "fr-CA-AntoineNeural",
# German
"Katja (Female, DE)": "de-DE-KatjaNeural",
"Conrad (Male, DE)": "de-DE-ConradNeural",
"Amala (Female, CH)": "de-CH-AmalaNeural",
"Jan (Male, CH)": "de-CH-JanNeural",
# Italian
"Elsa (Female, IT)": "it-IT-ElsaNeural",
"Diego (Male, IT)": "it-IT-DiegoNeural",
# Portuguese
"Francisca (Female, PT)": "pt-PT-FranciscaNeural",
"Duarte (Male, PT)": "pt-PT-DuarteNeural",
"Yara (Female, BR)": "pt-BR-YaraNeural",
"Antonio (Male, BR)": "pt-BR-AntonioNeural",
# Japanese
"Nanami (Female, JP)": "ja-JP-NanamiNeural",
"Keita (Male, JP)": "ja-JP-KeitaNeural",
# Chinese
"Xiaoxiao (Female, CN)": "zh-CN-XiaoxiaoNeural",
"Yunyang (Male, CN)": "zh-CN-YunyangNeural",
"HsiaoChen (Female, TW)": "zh-TW-HsiaoChenNeural",
"YunJhe (Male, TW)": "zh-TW-YunJheNeural",
# Korean
"SunHi (Female, KR)": "ko-KR-SunHiNeural",
"InJoon (Male, KR)": "ko-KR-InJoonNeural",
# Russian
"Svetlana (Female, RU)": "ru-RU-SvetlanaNeural",
"Dmitry (Male, RU)": "ru-RU-DmitryNeural",
# Arabic
"Salma (Female, EG)": "ar-EG-SalmaNeural",
"Shakir (Male, EG)": "ar-EG-ShakirNeural",
"Hamed (Male, SA)": "ar-SA-HamedNeural",
# Hindi
"Swara (Female, IN)": "hi-IN-SwaraNeural",
"Madhur (Male, IN)": "hi-IN-MadhurNeural",
# And many more...
"Brigitte (Female, BE)": "fr-BE-BrigitteNeural",
"Gerard (Male, BE)": "fr-BE-GerardNeural",
"Finn (Male, NL)": "nl-NL-FinnNeural",
"Maarten (Male, NL)": "nl-NL-MaartenNeural",
"Sofie (Female, NL)": "nl-NL-SofieNeural",
}
def text_to_speech(text: str, voice: str) -> str:
"""Wrapper function to run async code"""
return asyncio.run(generate_speech(text, voice))
def get_languages() -> list:
"""Extract unique languages from voice names"""
languages = set()
for name in VOICES.keys():
if '(' in name and ')' in name:
lang = name.split('(')[1].split(')')[0]
languages.add(lang)
return sorted(languages)
def filter_voices(language: str) -> Dict[str, str]:
"""Filter voices by selected language"""
if not language or language == "All":
return VOICES
return {name: voice for name, voice in VOICES.items() if f"({language})" in name}
def update_voice_dropdown(language: str) -> Dict:
"""Update voice dropdown based on selected language"""
filtered_voices = filter_voices(language)
return gr.Dropdown.update(
choices=list(filtered_voices.keys()),
value=list(filtered_voices.keys())[0] if filtered_voices else None
)
with gr.Blocks(title="Multi-Voice Text-to-Speech", theme="soft") as demo:
gr.Markdown("""
# 🎤 Advanced Text-to-Speech Converter
### With 100+ Voices Across Multiple Languages
""")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Enter your text",
placeholder="Type or paste your text here...",
lines=5,
max_lines=10
)
with gr.Row():
language_filter = gr.Dropdown(
["All"] + get_languages(),
label="Filter by Language",
value="All"
)
voice_dropdown = gr.Dropdown(
list(VOICES.keys()),
label="Select Voice",
value="Jenny (Female, US)"
)
generate_btn = gr.Button("Generate Speech", variant="primary")
with gr.Column():
audio_output = gr.Audio(label="Generated Speech", autoplay=True)
download_btn = gr.DownloadButton(
"Download Audio",
visible=False,
file_name="speech.mp3"
)
# Interactive components
language_filter.change(
update_voice_dropdown,
inputs=language_filter,
outputs=voice_dropdown
)
generate_btn.click(
text_to_speech,
inputs=[text_input, voice_dropdown],
outputs=[audio_output, download_btn]
)
audio_output.change(
lambda: gr.DownloadButton.update(visible=True),
outputs=download_btn
)
if __name__ == "__main__":
demo.launch()