Spaces:
Sleeping
Sleeping
import os | |
import tempfile | |
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
from TTS.utils.synthesizer import Synthesizer | |
import logging | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Max input text length | |
MAX_TXT_LEN = 400 | |
# Map simple names to checkpoint files | |
MODEL_INFO = { | |
"Model 1": "checkpoint_2080000.pth", | |
"Model 2": "checkpoint_2085000.pth", | |
"Model 3": "checkpoint_2090000.pth", | |
"Model 4": "checkpoint_2095000.pth", | |
"Model 5": "checkpoint_2100000.pth", | |
} | |
# Cache for loaded synthesizers to avoid reloading | |
synthesizer_cache = {} | |
def download_config(): | |
"""Download and cache the config file.""" | |
try: | |
config_path = hf_hub_download("sulaimank/luganda_LMs", filename="config.json") | |
logger.info(f"Config downloaded to: {config_path}") | |
return config_path | |
except Exception as e: | |
logger.error(f"Failed to download config: {e}") | |
raise | |
# Download config once at startup | |
config_path = download_config() | |
def load_synth(model_choice: str): | |
"""Load synthesizer with caching to improve performance.""" | |
if model_choice in synthesizer_cache: | |
logger.info(f"Using cached synthesizer for {model_choice}") | |
return synthesizer_cache[model_choice] | |
try: | |
model_file = MODEL_INFO[model_choice] | |
model_path = hf_hub_download("sulaimank/luganda_LMs", filename=model_file) | |
synthesizer = Synthesizer(tts_checkpoint=model_path, tts_config_path=config_path) | |
# Cache the synthesizer | |
synthesizer_cache[model_choice] = synthesizer | |
logger.info(f"Loaded and cached synthesizer for {model_choice}") | |
return synthesizer | |
except Exception as e: | |
logger.error(f"Failed to load synthesizer for {model_choice}: {e}") | |
raise | |
def tts(text: str, model_choice: str): | |
"""Generate TTS audio from text.""" | |
if not text.strip(): | |
return None, "β οΈ Please enter some text to synthesize." | |
# Truncate if too long | |
original_length = len(text) | |
if len(text) > MAX_TXT_LEN: | |
text = text[:MAX_TXT_LEN] | |
warning_msg = f"β οΈ Input truncated from {original_length} to {MAX_TXT_LEN} characters." | |
else: | |
warning_msg = f"β Processing {len(text)} characters." | |
try: | |
logger.info(f"Generating TTS for: '{text[:50]}...' using {model_choice}") | |
synthesizer = load_synth(model_choice) | |
wav = synthesizer.tts(text) | |
# Save to temporary file | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: | |
synthesizer.save_wav(wav, fp.name) | |
logger.info(f"Audio saved to: {fp.name}") | |
return fp.name, warning_msg | |
except Exception as e: | |
error_msg = f"β Error generating speech: {str(e)}" | |
logger.error(error_msg) | |
return None, error_msg | |
# Enhanced examples with more variety | |
examples = [ | |
["Nalubaale y'ennyanja esinga obunene mu Uganda.", "Model 1"], | |
["Abantu bangi tebamnyi kuwandika bulungi Luganda.", "Model 3"], | |
["Kampala kye kibuga kya Uganda ekikulu.", "Model 5"], | |
["Webale nnyingi olw'obuyambi bwo.", "Model 2"], | |
["Enkya tugenda okusoma ebitabo ebipya.", "Model 4"], | |
] | |
# Custom CSS for better styling and centering | |
custom_css = """ | |
/* Main container centering */ | |
.gradio-container { | |
max-width: 1200px !important; | |
margin: 0 auto !important; | |
padding: 20px !important; | |
} | |
/* Center all content */ | |
.main-content { | |
max-width: 1000px; | |
margin: 0 auto; | |
padding: 0 20px; | |
} | |
/* Status message styling */ | |
.status-message { | |
padding: 12px; | |
border-radius: 8px; | |
margin: 10px 0; | |
text-align: center; | |
font-weight: 500; | |
} | |
/* Center radio buttons */ | |
.radio-group { | |
display: flex; | |
flex-direction: column; | |
align-items: center; | |
} | |
/* Better button styling */ | |
.generate-btn { | |
margin: 20px auto; | |
display: block; | |
min-width: 200px; | |
} | |
/* Examples section */ | |
.examples-section { | |
margin: 30px 0; | |
padding: 20px; | |
background-color: #fafbfc; | |
border-radius: 12px; | |
border: 1px solid #e1e5e9; | |
} | |
/* Center examples title */ | |
.examples-title { | |
text-align: center; | |
font-size: 1.1em; | |
font-weight: 600; | |
margin-bottom: 15px; | |
color: #374151; | |
} | |
/* Footer styling */ | |
.footer { | |
margin-top: 40px; | |
padding: 20px; | |
text-align: center; | |
border-top: 1px solid #e1e5e9; | |
} | |
/* Input components centering */ | |
.input-section { | |
padding: 20px 0; | |
} | |
/* Audio output centering */ | |
.audio-section { | |
display: flex; | |
flex-direction: column; | |
align-items: center; | |
padding: 20px; | |
} | |
""" | |
with gr.Blocks( | |
theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue"), | |
css=custom_css, | |
title="Luganda TTS" | |
) as demo: | |
with gr.Column(elem_classes=["main-content"]): | |
# Header | |
gr.Markdown( | |
""" | |
<div style="text-align: center; padding: 2em 0; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); margin: -20px -20px 30px -20px; border-radius: 0 0 20px 20px; color: white;"> | |
<h1 style="margin: 0; font-size: 2.5em; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">π£οΈ Luganda TTS πΊπ¬</h1> | |
<p style="font-size: 1.3em; margin: 15px 0 0 0; opacity: 0.95;"> | |
Convert text into natural Luganda speech using fine-tuned neural models<br> | |
<span style="font-size: 0.9em; opacity: 0.8;">Choose from 5 different model checkpoints trained on Luganda data</span> | |
</p> | |
</div> | |
""" | |
) | |
with gr.Row(equal_height=False): | |
with gr.Column(scale=3, elem_classes=["input-section"]): | |
text_input = gr.Textbox( | |
label=f"π Enter Luganda Text (max {MAX_TXT_LEN} characters)", | |
placeholder="Wandika wano ekigambo mu Luganda...", | |
value="Gyebale ko ssebo.", | |
lines=4, | |
max_lines=6, | |
) | |
gr.Markdown( | |
"<h3 style='text-align: center; margin: 20px 0 10px 0; color: #4c1d95;'>ποΈ Model Selection</h3>" | |
) | |
model_choice = gr.Radio( | |
label="Choose TTS Model", | |
choices=list(MODEL_INFO.keys()), | |
value="Model 3", | |
interactive=True, | |
elem_classes=["radio-group"] | |
) | |
run_btn = gr.Button( | |
"π Generate Speech", | |
variant="primary", | |
size="lg", | |
elem_classes=["generate-btn"] | |
) | |
with gr.Column(scale=2, elem_classes=["audio-section"]): | |
gr.Markdown( | |
"<h3 style='text-align: center; margin: 0 0 15px 0; color: #4c1d95;'>π΅ Generated Audio</h3>" | |
) | |
audio_output = gr.Audio( | |
label="Generated Speech", | |
type="filepath", | |
show_download_button=True | |
) | |
status_output = gr.Textbox( | |
label="Status", | |
interactive=False, | |
show_label=False, | |
container=False, | |
elem_classes=["status-message"] | |
) | |
# Examples section | |
with gr.Column(elem_classes=["examples-section"]): | |
gr.Markdown("<div class='examples-title'>π‘ Try these Luganda examples:</div>") | |
gr.Examples( | |
examples=examples, | |
inputs=[text_input, model_choice], | |
outputs=[audio_output, status_output], | |
fn=tts, | |
cache_examples=False, | |
label="" | |
) | |
# Connect the generate button | |
run_btn.click( | |
fn=tts, | |
inputs=[text_input, model_choice], | |
outputs=[audio_output, status_output] | |
) | |
# Footer | |
gr.Markdown( | |
""" | |
<div class="footer"> | |
<div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 25px; border-radius: 15px; color: white; text-shadow: 1px 1px 2px rgba(0,0,0,0.3);"> | |
<h3 style="margin: 0 0 10px 0; font-size: 1.3em;">π Technical Details</h3> | |
<p style="margin: 5px 0; font-size: 1.1em;"> | |
<strong>Powered by:</strong> Coqui TTS Framework<br> | |
<strong>Models:</strong> Fine-tuned on Luganda speech data<br> | |
<strong>Hosting:</strong> Hugging Face Spaces | |
</p> | |
</div> | |
<div style="margin-top: 20px; padding: 15px; background-color: #f8f9ff; border-radius: 10px; border: 1px solid #e1e5e9;"> | |
<p style="margin: 0; font-size: 0.95em; color: #6b7280;"> | |
π‘ <strong>Tips for best results:</strong> Use proper Luganda spelling, punctuation, and avoid mixing languages | |
</p> | |
</div> | |
</div> | |
""" | |
) | |
if __name__ == "__main__": | |
demo.launch( | |
share=False, | |
server_name="0.0.0.0", | |
server_port=7860, | |
show_error=True | |
) | |