import os import tempfile import gradio as gr from huggingface_hub import hf_hub_download from TTS.utils.synthesizer import Synthesizer # Configuration MAX_TXT_LEN = 400 HF_REPO = "sulaimank/luganda_LMs" # Model mappings MODEL_INFO = { "Model 1": "checkpoint_2080000.pth", "Model 2": "checkpoint_2085000.pth", "Model 3": "checkpoint_2090000.pth", "Model 4": "checkpoint_2095000.pth", "Model 5": "checkpoint_2100000.pth", } # Cache for loaded synthesizers synthesizer_cache = {} config_path = None def get_config(): """Download config file once""" global config_path if config_path is None: config_path = hf_hub_download(HF_REPO, filename="config.json") return config_path def load_synth(model_choice: str): """Load synthesizer with caching""" if model_choice not in synthesizer_cache: model_file = MODEL_INFO[model_choice] model_path = hf_hub_download(HF_REPO, filename=model_file) synthesizer_cache[model_choice] = Synthesizer( tts_checkpoint=model_path, tts_config_path=get_config() ) return synthesizer_cache[model_choice] def generate_speech(text: str, model_choice: str): """Generate speech from text""" if not text.strip(): return None # Truncate if too long if len(text) > MAX_TXT_LEN: text = text[:MAX_TXT_LEN] try: synthesizer = load_synth(model_choice) wav = synthesizer.tts(text) # Save to temporary file with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: synthesizer.save_wav(wav, fp.name) return fp.name except Exception as e: print(f"Error generating speech: {e}") return None # Example texts examples = [ ["Nalubaale y'ennyanja esinga obunene mu Uganda.", "Model 1"], ["Abantu bangi tebamanyi kuwandika bulungi Luganda.", "Model 3"], ["Kampala kye kibuga kya Uganda ekikulu.", "Model 5"], ] # Custom CSS for modern look custom_css = """ #title { text-align: center; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; font-size: 3rem; font-weight: 800; margin-bottom: 0.5rem; } #subtitle { text-align: center; color: #64748b; font-size: 1.1rem; margin-bottom: 2rem; } .main-container { max-width: 1400px; margin: 0 auto; padding: 2rem 1rem; width: 95%; } .input-section { background: white; border-radius: 16px; padding: 2.5rem; box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1); border: 1px solid rgba(255, 255, 255, 0.2); backdrop-filter: blur(10px); width: 100%; } .generate-btn { background: linear-gradient(45deg, #667eea, #764ba2) !important; border: none !important; border-radius: 12px !important; padding: 0.75rem 2rem !important; font-weight: 600 !important; font-size: 1.1rem !important; transition: all 0.3s ease !important; } .generate-btn:hover { transform: translateY(-2px) !important; box-shadow: 0 8px 25px rgba(102, 126, 234, 0.3) !important; } #root { background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); min-height: 100vh; } """ # Create the Gradio interface with gr.Blocks( css=custom_css, theme=gr.themes.Soft( primary_hue="blue", secondary_hue="purple", neutral_hue="slate" ), title="Luganda TTS" ) as demo: # Header gr.HTML("""
🗣️ Luganda TTS 🗣️
Transform text into Luganda speech
""") # Main container with gr.Column(elem_classes=["main-container"]): with gr.Column(elem_classes=["input-section"]): # Input text text_input = gr.Textbox( label="Enter Luganda Text", placeholder="Wandika wano ekigambo mu Luganda...", value="Gyebaleko ssebo.", lines=5, max_lines=8 ) # Model selection and generate button in a row with gr.Row(): model_choice = gr.Radio( label="Select Model", choices=list(MODEL_INFO.keys()), value="Model 1", interactive=True ) with gr.Column(): generate_btn = gr.Button( "Generate Speech", variant="primary", elem_classes=["generate-btn"], size="lg" ) # Audio output audio_output = gr.Audio( label="Generated Speech", type="filepath", interactive=False ) # Examples gr.Examples( examples=examples, inputs=[text_input, model_choice], outputs=audio_output, fn=generate_speech, cache_examples=False, label="Try these examples" ) # Event handlers generate_btn.click( fn=generate_speech, inputs=[text_input, model_choice], outputs=audio_output, show_progress=True ) text_input.submit( fn=generate_speech, inputs=[text_input, model_choice], outputs=audio_output, show_progress=True ) if __name__ == "__main__": demo.launch( share=False, server_name="0.0.0.0", server_port=7860, show_error=True )