luganda-TTS / app.py
sulaimank's picture
Update app.py
5dab9a1 verified
raw
history blame
9.39 kB
import os
import tempfile
import gradio as gr
from huggingface_hub import hf_hub_download
from TTS.utils.synthesizer import Synthesizer
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Max input text length
MAX_TXT_LEN = 400
# Map simple names to checkpoint files
MODEL_INFO = {
"Model 1": "checkpoint_2080000.pth",
"Model 2": "checkpoint_2085000.pth",
"Model 3": "checkpoint_2090000.pth",
"Model 4": "checkpoint_2095000.pth",
"Model 5": "checkpoint_2100000.pth",
}
# Cache for loaded synthesizers to avoid reloading
synthesizer_cache = {}
def download_config():
"""Download and cache the config file."""
try:
config_path = hf_hub_download("sulaimank/luganda_LMs", filename="config.json")
logger.info(f"Config downloaded to: {config_path}")
return config_path
except Exception as e:
logger.error(f"Failed to download config: {e}")
raise
# Download config once at startup
config_path = download_config()
def load_synth(model_choice: str):
"""Load synthesizer with caching to improve performance."""
if model_choice in synthesizer_cache:
logger.info(f"Using cached synthesizer for {model_choice}")
return synthesizer_cache[model_choice]
try:
model_file = MODEL_INFO[model_choice]
model_path = hf_hub_download("sulaimank/luganda_LMs", filename=model_file)
synthesizer = Synthesizer(tts_checkpoint=model_path, tts_config_path=config_path)
# Cache the synthesizer
synthesizer_cache[model_choice] = synthesizer
logger.info(f"Loaded and cached synthesizer for {model_choice}")
return synthesizer
except Exception as e:
logger.error(f"Failed to load synthesizer for {model_choice}: {e}")
raise
def tts(text: str, model_choice: str):
"""Generate TTS audio from text."""
if not text.strip():
return None, "⚠️ Please enter some text to synthesize."
# Truncate if too long
original_length = len(text)
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
warning_msg = f"⚠️ Input truncated from {original_length} to {MAX_TXT_LEN} characters."
else:
warning_msg = f"βœ… Processing {len(text)} characters."
try:
logger.info(f"Generating TTS for: '{text[:50]}...' using {model_choice}")
synthesizer = load_synth(model_choice)
wav = synthesizer.tts(text)
# Save to temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
synthesizer.save_wav(wav, fp.name)
logger.info(f"Audio saved to: {fp.name}")
return fp.name, warning_msg
except Exception as e:
error_msg = f"❌ Error generating speech: {str(e)}"
logger.error(error_msg)
return None, error_msg
# Enhanced examples with more variety
examples = [
["Nalubaale y'ennyanja esinga obunene mu Uganda.", "Model 1"],
["Abantu bangi tebamnyi kuwandika bulungi Luganda.", "Model 3"],
["Kampala kye kibuga kya Uganda ekikulu.", "Model 5"],
["Webale nnyingi olw'obuyambi bwo.", "Model 2"],
["Enkya tugenda okusoma ebitabo ebipya.", "Model 4"],
]
# Custom CSS for better styling and centering
custom_css = """
/* Main container centering */
.gradio-container {
max-width: 1200px !important;
margin: 0 auto !important;
padding: 20px !important;
}
/* Center all content */
.main-content {
max-width: 1000px;
margin: 0 auto;
padding: 0 20px;
}
/* Status message styling */
.status-message {
padding: 12px;
border-radius: 8px;
margin: 10px 0;
text-align: center;
font-weight: 500;
}
/* Center radio buttons */
.radio-group {
display: flex;
flex-direction: column;
align-items: center;
}
/* Better button styling */
.generate-btn {
margin: 20px auto;
display: block;
min-width: 200px;
}
/* Examples section */
.examples-section {
margin: 30px 0;
padding: 20px;
background-color: #fafbfc;
border-radius: 12px;
border: 1px solid #e1e5e9;
}
/* Center examples title */
.examples-title {
text-align: center;
font-size: 1.1em;
font-weight: 600;
margin-bottom: 15px;
color: #374151;
}
/* Footer styling */
.footer {
margin-top: 40px;
padding: 20px;
text-align: center;
border-top: 1px solid #e1e5e9;
}
/* Input components centering */
.input-section {
padding: 20px 0;
}
/* Audio output centering */
.audio-section {
display: flex;
flex-direction: column;
align-items: center;
padding: 20px;
}
"""
with gr.Blocks(
theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue"),
css=custom_css,
title="Luganda TTS"
) as demo:
with gr.Column(elem_classes=["main-content"]):
# Header
gr.Markdown(
"""
<div style="text-align: center; padding: 2em 0; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); margin: -20px -20px 30px -20px; border-radius: 0 0 20px 20px; color: white;">
<h1 style="margin: 0; font-size: 2.5em; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">πŸ—£οΈ Luganda TTS πŸ‡ΊπŸ‡¬</h1>
<p style="font-size: 1.3em; margin: 15px 0 0 0; opacity: 0.95;">
Convert text into natural Luganda speech using fine-tuned neural models<br>
<span style="font-size: 0.9em; opacity: 0.8;">Choose from 5 different model checkpoints trained on Luganda data</span>
</p>
</div>
"""
)
with gr.Row(equal_height=False):
with gr.Column(scale=3, elem_classes=["input-section"]):
text_input = gr.Textbox(
label=f"πŸ“ Enter Luganda Text (max {MAX_TXT_LEN} characters)",
placeholder="Wandika wano ekigambo mu Luganda...",
value="Gyebale ko ssebo.",
lines=4,
max_lines=6,
)
gr.Markdown(
"<h3 style='text-align: center; margin: 20px 0 10px 0; color: #4c1d95;'>πŸŽ›οΈ Model Selection</h3>"
)
model_choice = gr.Radio(
label="Choose TTS Model",
choices=list(MODEL_INFO.keys()),
value="Model 3",
interactive=True,
elem_classes=["radio-group"]
)
run_btn = gr.Button(
"πŸ”Š Generate Speech",
variant="primary",
size="lg",
elem_classes=["generate-btn"]
)
with gr.Column(scale=2, elem_classes=["audio-section"]):
gr.Markdown(
"<h3 style='text-align: center; margin: 0 0 15px 0; color: #4c1d95;'>🎡 Generated Audio</h3>"
)
audio_output = gr.Audio(
label="Generated Speech",
type="filepath",
show_download_button=True
)
status_output = gr.Textbox(
label="Status",
interactive=False,
show_label=False,
container=False,
elem_classes=["status-message"]
)
# Examples section
with gr.Column(elem_classes=["examples-section"]):
gr.Markdown("<div class='examples-title'>πŸ’‘ Try these Luganda examples:</div>")
gr.Examples(
examples=examples,
inputs=[text_input, model_choice],
outputs=[audio_output, status_output],
fn=tts,
cache_examples=False,
label=""
)
# Connect the generate button
run_btn.click(
fn=tts,
inputs=[text_input, model_choice],
outputs=[audio_output, status_output]
)
# Footer
gr.Markdown(
"""
<div class="footer">
<div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 25px; border-radius: 15px; color: white; text-shadow: 1px 1px 2px rgba(0,0,0,0.3);">
<h3 style="margin: 0 0 10px 0; font-size: 1.3em;">πŸš€ Technical Details</h3>
<p style="margin: 5px 0; font-size: 1.1em;">
<strong>Powered by:</strong> Coqui TTS Framework<br>
<strong>Models:</strong> Fine-tuned on Luganda speech data<br>
<strong>Hosting:</strong> Hugging Face Spaces
</p>
</div>
<div style="margin-top: 20px; padding: 15px; background-color: #f8f9ff; border-radius: 10px; border: 1px solid #e1e5e9;">
<p style="margin: 0; font-size: 0.95em; color: #6b7280;">
πŸ’‘ <strong>Tips for best results:</strong> Use proper Luganda spelling, punctuation, and avoid mixing languages
</p>
</div>
</div>
"""
)
if __name__ == "__main__":
demo.launch(
share=False,
server_name="0.0.0.0",
server_port=7860,
show_error=True
)