luganda-TTS / app.py
sulaimank's picture
Update app.py
d64991f verified
import os
import tempfile
import gradio as gr
from huggingface_hub import hf_hub_download
from TTS.utils.synthesizer import Synthesizer
# Configuration
MAX_TXT_LEN = 400
HF_REPO = "sulaimank/luganda_LMs"
# Model mappings
MODEL_INFO = {
"Model 1": "checkpoint_2080000.pth",
"Model 2": "checkpoint_2085000.pth",
"Model 3": "checkpoint_2090000.pth",
"Model 4": "checkpoint_2095000.pth",
"Model 5": "checkpoint_2100000.pth",
}
# Cache for loaded synthesizers
synthesizer_cache = {}
config_path = None
def get_config():
"""Download config file once"""
global config_path
if config_path is None:
config_path = hf_hub_download(HF_REPO, filename="config.json")
return config_path
def load_synth(model_choice: str):
"""Load synthesizer with caching"""
if model_choice not in synthesizer_cache:
model_file = MODEL_INFO[model_choice]
model_path = hf_hub_download(HF_REPO, filename=model_file)
synthesizer_cache[model_choice] = Synthesizer(
tts_checkpoint=model_path,
tts_config_path=get_config()
)
return synthesizer_cache[model_choice]
def generate_speech(text: str, model_choice: str):
"""Generate speech from text"""
if not text.strip():
return None
# Truncate if too long
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
try:
synthesizer = load_synth(model_choice)
wav = synthesizer.tts(text)
# Save to temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
synthesizer.save_wav(wav, fp.name)
return fp.name
except Exception as e:
print(f"Error generating speech: {e}")
return None
# Example texts
examples = [
["Nalubaale y'ennyanja esinga obunene mu Uganda.", "Model 1"],
["Abantu bangi tebamanyi kuwandika bulungi Luganda.", "Model 3"],
["Kampala kye kibuga kya Uganda ekikulu.", "Model 5"],
]
# Custom CSS for modern look
custom_css = """
#title {
text-align: center;
background: linear-gradient(45deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
font-size: 3rem;
font-weight: 800;
margin-bottom: 0.5rem;
}
#subtitle {
text-align: center;
color: #64748b;
font-size: 1.1rem;
margin-bottom: 2rem;
}
.main-container {
max-width: 1400px;
margin: 0 auto;
padding: 2rem 1rem;
width: 95%;
}
.input-section {
background: white;
border-radius: 16px;
padding: 2.5rem;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
border: 1px solid rgba(255, 255, 255, 0.2);
backdrop-filter: blur(10px);
width: 100%;
}
.generate-btn {
background: linear-gradient(45deg, #667eea, #764ba2) !important;
border: none !important;
border-radius: 12px !important;
padding: 0.75rem 2rem !important;
font-weight: 600 !important;
font-size: 1.1rem !important;
transition: all 0.3s ease !important;
}
.generate-btn:hover {
transform: translateY(-2px) !important;
box-shadow: 0 8px 25px rgba(102, 126, 234, 0.3) !important;
}
#root {
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
min-height: 100vh;
}
"""
# Create the Gradio interface
with gr.Blocks(
css=custom_css,
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="purple",
neutral_hue="slate"
),
title="Luganda TTS"
) as demo:
# Header
gr.HTML("""
<div id="title">🗣️ Luganda TTS 🗣️</div>
<div id="subtitle">Transform text into Luganda speech</div>
""")
# Main container
with gr.Column(elem_classes=["main-container"]):
with gr.Column(elem_classes=["input-section"]):
# Input text
text_input = gr.Textbox(
label="Enter Luganda Text",
placeholder="Wandika wano ekigambo mu Luganda...",
value="Gyebaleko ssebo.",
lines=5,
max_lines=8
)
# Model selection and generate button in a row
with gr.Row():
model_choice = gr.Radio(
label="Select Model",
choices=list(MODEL_INFO.keys()),
value="Model 1",
interactive=True
)
with gr.Column():
generate_btn = gr.Button(
"Generate Speech",
variant="primary",
elem_classes=["generate-btn"],
size="lg"
)
# Audio output
audio_output = gr.Audio(
label="Generated Speech",
type="filepath",
interactive=False
)
# Examples
gr.Examples(
examples=examples,
inputs=[text_input, model_choice],
outputs=audio_output,
fn=generate_speech,
cache_examples=False,
label="Try these examples"
)
# Event handlers
generate_btn.click(
fn=generate_speech,
inputs=[text_input, model_choice],
outputs=audio_output,
show_progress=True
)
text_input.submit(
fn=generate_speech,
inputs=[text_input, model_choice],
outputs=audio_output,
show_progress=True
)
if __name__ == "__main__":
demo.launch(
share=False,
server_name="0.0.0.0",
server_port=7860,
show_error=True
)