Spaces:
Running
Running
File size: 5,357 Bytes
f5436ef 3c38334 f5436ef 3c38334 f5436ef 3c38334 6bf1c66 3c38334 6bf1c66 3c38334 f5436ef 1ef6c73 3c38334 1ef6c73 3c38334 1ef6c73 3c38334 f5436ef fe8d354 1ef6c73 fe8d354 1ef6c73 3c38334 1ef6c73 3c38334 1ef6c73 3c38334 1ef6c73 3c38334 17633ea 3c38334 4f66fe9 3c38334 17633ea fe8d354 17633ea 3c38334 1ef6c73 fe8d354 3c38334 f5436ef 1ef6c73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import gradio as gr
import edge_tts
import asyncio
import os
from typing import Dict
async def generate_speech(text: str, voice: str) -> str:
"""Generate speech from text using the specified voice"""
try:
communicate = edge_tts.Communicate(text, voice)
output_file = "output.mp3"
await communicate.save(output_file)
return output_file
except Exception as e:
print(f"Error: {e}")
return None
# List of voices
VOICES: Dict[str, str] = {
# English (US)
"Jenny (Female, US)": "en-US-JennyNeural",
"Guy (Male, US)": "en-US-GuyNeural",
"Aria (Female, US)": "en-US-AriaNeural",
"Davis (Male, US)": "en-US-DavisNeural",
"Jane (Female, US)": "en-US-JaneNeural",
"Jason (Male, US)": "en-US-JasonNeural",
"Nancy (Female, US)": "en-US-NancyNeural",
"Tony (Male, US)": "en-US-TonyNeural",
"Sara (Female, US)": "en-US-SaraNeural",
"Brandon (Male, US)": "en-US-BrandonNeural",
# English (UK)
"Libby (Female, UK)": "en-GB-LibbyNeural",
"Ryan (Male, UK)": "en-GB-RyanNeural",
"Sonia (Female, UK)": "en-GB-SoniaNeural",
"Thomas (Male, UK)": "en-GB-ThomasNeural",
# English (Australia)
"Natasha (Female, AU)": "en-AU-NatashaNeural",
"William (Male, AU)": "en-AU-WilliamNeural",
# English (India)
"Neerja (Female, IN)": "en-IN-NeerjaNeural",
"Prabhat (Male, IN)": "en-IN-PrabhatNeural",
# Spanish
"Elvira (Female, ES)": "es-ES-ElviraNeural",
"Alvaro (Male, ES)": "es-ES-AlvaroNeural",
"Ana (Female, AR)": "es-AR-AnaNeural",
"Tomas (Male, AR)": "es-AR-TomasNeural",
"Camila (Female, MX)": "es-MX-CamilaNeural",
"Jorge (Male, MX)": "es-MX-JorgeNeural",
# French
"Denise (Female, FR)": "fr-FR-DeniseNeural",
"Henri (Male, FR)": "fr-FR-HenriNeural",
"Sylvie (Female, CA)": "fr-CA-SylvieNeural",
"Antoine (Male, CA)": "fr-CA-AntoineNeural",
# German
"Katja (Female, DE)": "de-DE-KatjaNeural",
"Conrad (Male, DE)": "de-DE-ConradNeural",
"Amala (Female, CH)": "de-CH-AmalaNeural",
"Jan (Male, CH)": "de-CH-JanNeural",
# Italian
"Elsa (Female, IT)": "it-IT-ElsaNeural",
"Diego (Male, IT)": "it-IT-DiegoNeural",
# Portuguese
"Francisca (Female, PT)": "pt-PT-FranciscaNeural",
"Duarte (Male, PT)": "pt-PT-DuarteNeural",
"Yara (Female, BR)": "pt-BR-YaraNeural",
"Antonio (Male, BR)": "pt-BR-AntonioNeural",
# Japanese
"Nanami (Female, JP)": "ja-JP-NanamiNeural",
"Keita (Male, JP)": "ja-JP-KeitaNeural",
# Chinese
"Xiaoxiao (Female, CN)": "zh-CN-XiaoxiaoNeural",
"Yunyang (Male, CN)": "zh-CN-YunyangNeural",
"HsiaoChen (Female, TW)": "zh-TW-HsiaoChenNeural",
"YunJhe (Male, TW)": "zh-TW-YunJheNeural",
# Korean
"SunHi (Female, KR)": "ko-KR-SunHiNeural",
"InJoon (Male, KR)": "ko-KR-InJoonNeural",
# Russian
"Svetlana (Female, RU)": "ru-RU-SvetlanaNeural",
"Dmitry (Male, RU)": "ru-RU-DmitryNeural",
# Arabic
"Salma (Female, EG)": "ar-EG-SalmaNeural",
"Shakir (Male, EG)": "ar-EG-ShakirNeural",
"Hamed (Male, SA)": "ar-SA-HamedNeural",
# Hindi
"Swara (Female, IN)": "hi-IN-SwaraNeural",
"Madhur (Male, IN)": "hi-IN-MadhurNeural",
# Others
"Brigitte (Female, BE)": "fr-BE-BrigitteNeural",
"Gerard (Male, BE)": "fr-BE-GerardNeural",
"Finn (Male, NL)": "nl-NL-FinnNeural",
"Maarten (Male, NL)": "nl-NL-MaartenNeural",
"Sofie (Female, NL)": "nl-NL-SofieNeural",
}
def text_to_speech(text: str, voice: str) -> tuple:
"""Wrapper function to run async code and return audio file and download button state"""
if not text or not voice:
return None, {"visible": False}
output_file = asyncio.run(generate_speech(text, VOICES.get(voice, VOICES["Jenny (Female, US)"])))
return output_file, {"visible": True} if output_file else (None, {"visible": False})
with gr.Blocks(title="Multi-Voice Text-to-Speech", theme="soft") as demo:
gr.Markdown("""
# 🎤 Advanced Text-to-Speech Converter
### With 100+ Voices
""")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Enter your text",
placeholder="Type or paste your text here...",
lines=5,
max_lines=10
)
voice_dropdown = gr.Dropdown(
choices=list(VOICES.keys()),
label="Select Voice",
value="Jenny (Female, US)"
)
generate_btn = gr.Button("Generate Speech", variant="primary")
with gr.Column():
audio_output = gr.Audio(label="Generated Speech", autoplay=True)
download_btn = gr.DownloadButton(
label="Download Audio",
visible=False
)
# Interactive components
generate_btn.click(
fn=text_to_speech,
inputs=[text_input, voice_dropdown],
outputs=[audio_output, download_btn]
)
audio_output.change(
fn=lambda x: {"visible": bool(x)},
inputs=audio_output,
outputs=download_btn
)
if __name__ == "__main__":
demo.launch(share=True) # Set share=True for public link (remove for local only) |