Spaces:

sulaimank
/

luganda-TTS

Sleeping

App Files Files Community

sulaimank commited on 19 days ago

Commit

f6ff058

verified ·

1 Parent(s): 4f2ec6d

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -227

app.py CHANGED Viewed

@@ -3,267 +3,114 @@ import tempfile
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from TTS.utils.synthesizer import Synthesizer
-import logging
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
 # Max input text length
 MAX_TXT_LEN = 400
-# Map simple names to checkpoint files
 MODEL_INFO = {
     "Model 1": "checkpoint_2080000.pth",
-    "Model 2": "checkpoint_2085000.pth",
     "Model 3": "checkpoint_2090000.pth",
     "Model 4": "checkpoint_2095000.pth",
     "Model 5": "checkpoint_2100000.pth",
 }
-# Cache for loaded synthesizers to avoid reloading
-synthesizer_cache = {}
-def download_config():
-    """Download and cache the config file."""
-    try:
-        config_path = hf_hub_download("sulaimank/luganda_LMs", filename="config.json")
-        logger.info(f"Config downloaded to: {config_path}")
-        return config_path
-    except Exception as e:
-        logger.error(f"Failed to download config: {e}")
-        raise
-# Download config once at startup
-config_path = download_config()
-def load_synth(model_choice: str):
-    """Load synthesizer with caching to improve performance."""
-    if model_choice in synthesizer_cache:
-        logger.info(f"Using cached synthesizer for {model_choice}")
-        return synthesizer_cache[model_choice]
-    try:
-        model_file = MODEL_INFO[model_choice]
-        model_path = hf_hub_download("sulaimank/luganda_LMs", filename=model_file)
-        synthesizer = Synthesizer(tts_checkpoint=model_path, tts_config_path=config_path)
-        # Cache the synthesizer
-        synthesizer_cache[model_choice] = synthesizer
-        logger.info(f"Loaded and cached synthesizer for {model_choice}")
-        return synthesizer
-    except Exception as e:
-        logger.error(f"Failed to load synthesizer for {model_choice}: {e}")
-        raise
 def tts(text: str, model_choice: str):
-    """Generate TTS audio from text."""
-    if not text.strip():
-        return None, "⚠️ Please enter some text to synthesize."
-    # Truncate if too long
-    original_length = len(text)
     if len(text) > MAX_TXT_LEN:
         text = text[:MAX_TXT_LEN]
-        warning_msg = f"⚠️ Input truncated from {original_length} to {MAX_TXT_LEN} characters."
-    else:
-        warning_msg = f"✅ Processing {len(text)} characters."
-    try:
-        logger.info(f"Generating TTS for: '{text[:50]}...' using {model_choice}")
-        synthesizer = load_synth(model_choice)
-        wav = synthesizer.tts(text)
-        # Save to temporary file
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-            synthesizer.save_wav(wav, fp.name)
-            logger.info(f"Audio saved to: {fp.name}")
-            return fp.name, warning_msg
-    except Exception as e:
-        error_msg = f"❌ Error generating speech: {str(e)}"
-        logger.error(error_msg)
-        return None, error_msg
 # Example sentences
 examples = [
     ["Nalubaale y'ennyanja esinga obunene mu Uganda.", "Model 1"],
-    ["Abantu bangi tebamnyi kuwandika bulungi Luganda.", "Model 3"],
     ["Kampala kye kibuga kya Uganda ekikulu.", "Model 5"],
-    ["Webale nnyingi olw'obuyambi bwo.", "Model 2"],
-    ["Enkya tugenda okusoma ebitabo ebipya.", "Model 4"],
 ]
-# Custom CSS for better styling and centering
-custom_css = """
-/* Main container centering */
-.gradio-container {
-    max-width: 1400px !important;
-    margin: 0 auto !important;
-    padding: 10px !important;
-}
-/* Content wrapper */
-.main-content {
-    max-width: 1400px;
-    margin: 0 auto;
-    padding: 0 10px;
-}
-/* Heading center */
-.header {
-    text-align: center;
-    padding: 2em 0;
-}
-.header h1 {
-    font-size: 2.5em;
-    margin-bottom: 0.3em;
-}
-.header p {
-    font-size: 1.2em;
-    margin: 0.2em 0;
-}
-/* Make text, radio, and audio boxes wider */
-.input-section textarea,
-.input-section .wrap,
-.audio-section audio {
-    width: 100% !important;
-    min-width: 700px;
-}
-/* Radio group horizontal and wide */
-.radio-group {
-    display: flex;
-    flex-direction: row;
-    justify-content: space-between;
-    flex-wrap: wrap;
-    gap: 15px;
-}
-.radio-group label {
-    flex: 1;
-    min-width: 150px;
-    padding: 12px 20px !important;
-    border-radius: 20px !important;
-    text-align: center;
-}
-/* Bigger button */
-.generate-btn {
-    margin: 25px auto;
-    display: block;
-    min-width: 250px;
-    font-size: 1.1em;
-}
-"""
-with gr.Blocks(
-    theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue"),
-    css=custom_css,
-    title="Luganda TTS"
-) as demo:
-    with gr.Column(elem_classes=["main-content"]):
-        # Header
-        gr.Markdown(
-            """
-            <div class="header" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); margin: -20px -20px 30px -20px; border-radius: 0 0 20px 20px; color: white;">
-                <h1>🗣️ Luganda TTS 🇺🇬</h1>
-                <p>Convert text into natural Luganda speech using fine-tuned neural models</p>
-                <p style="font-size: 0.9em; opacity: 0.9;">Choose from 5 different model checkpoints trained on Luganda data</p>
-            </div>
-            """
-        )
-        with gr.Row(equal_height=False):
-            with gr.Column(scale=5, elem_classes=["input-section"]):
-                text_input = gr.Textbox(
-                    label=f"📝 Enter Luganda Text (max {MAX_TXT_LEN} characters)",
-                    placeholder="Wandika wano ekigambo mu Luganda...",
-                    value="Gyebale ko ssebo.",
-                    lines=4,
-                    max_lines=6,
-                )
-                gr.Markdown(
-                    "<h3 style='text-align: center; margin: 20px 0 10px 0; color: #4c1d95;'>🎛️ Model Selection</h3>"
-                )
-                model_choice = gr.Radio(
-                    label="Choose TTS Model",
-                    choices=list(MODEL_INFO.keys()),
-                    value="Model 3",
-                    interactive=True,
-                    elem_classes=["radio-group"]
-                )
-                run_btn = gr.Button(
-                    "🔊 Generate Speech",
-                    variant="primary",
-                    size="lg",
-                    elem_classes=["generate-btn"]
-                )
-            with gr.Column(scale=4, elem_classes=["audio-section"]):
-                gr.Markdown(
-                    "<h3 style='text-align: center; margin: 0 0 15px 0; color: #4c1d95;'>🎵 Generated Audio</h3>"
-                )
-                audio_output = gr.Audio(
-                    label="Generated Speech",
-                    type="filepath",
-                    show_download_button=True
-                )
-                status_output = gr.Textbox(
-                    label="Status",
-                    interactive=False,
-                    show_label=False,
-                    container=False,
-                )
-        # Examples section
-        with gr.Column():
-            gr.Markdown("<div style='text-align:center; font-size:1.1em; font-weight:600;'>💡 Try these Luganda examples:</div>")
-            gr.Examples(
-                examples=examples,
-                inputs=[text_input, model_choice],
-                outputs=[audio_output, status_output],
-                fn=tts,
-                cache_examples=False,
-                label=""
-            )
-    # Connect the generate button
-    run_btn.click(
-        fn=tts,
         inputs=[text_input, model_choice],
-        outputs=[audio_output, status_output]
     )
     # Footer
     # gr.Markdown(
     #     """
-    #     <div style="margin-top: 40px; padding: 20px; text-align: center; border-top: 1px solid #e1e5e9;">
-    #         <div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 25px; border-radius: 15px; color: white;">
-    #             <h3 style="margin: 0 0 10px 0; font-size: 1.3em;">🚀 Technical Details</h3>
-    #             <p style="margin: 5px 0; font-size: 1.1em;">
-    #                 # <strong>Powered by:</strong> Coqui TTS Framework<br>
-    #                 # <strong>Models:</strong> Fine-tuned on Luganda speech data<br>
-    #                 <strong>Hosting:</strong> Hugging Face Spaces
-    #             </p>
-    #         </div>
-    #         <div style="margin-top: 20px; padding: 15px; background-color: #f8f9ff; border-radius: 10px; border: 1px solid #e1e5e9;">
-    #             <p style="margin: 0; font-size: 0.95em; color: #6b7280;">
-    #                 💡 <strong>Tips:</strong> Use proper Luganda spelling, punctuation, and avoid mixing languages
-    #             </p>
-    #         </div>
     #     </div>
     #     """
     # )
 if __name__ == "__main__":
-    demo.launch(
-        share=False,
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

 import gradio as gr
 from huggingface_hub import hf_hub_download
 from TTS.utils.synthesizer import Synthesizer
 # Max input text length
 MAX_TXT_LEN = 400
+# Map simple names (Model 1, Model 2...) to checkpoint files
 MODEL_INFO = {
     "Model 1": "checkpoint_2080000.pth",
+    "Model 2": "checkpoint_2085000.pth",
     "Model 3": "checkpoint_2090000.pth",
     "Model 4": "checkpoint_2095000.pth",
     "Model 5": "checkpoint_2100000.pth",
 }
+# Download config once
+config_path = hf_hub_download("sulaimank/luganda_LMs", filename="config.json")
+def load_synth(model_file):
+    """Download and initialize the chosen synthesizer"""
+    model_path = hf_hub_download("sulaimank/luganda_LMs", filename=model_file)
+    return Synthesizer(
+        tts_checkpoint=model_path,
+        tts_config_path=config_path
+    )
 def tts(text: str, model_choice: str):
     if len(text) > MAX_TXT_LEN:
         text = text[:MAX_TXT_LEN]
+        print(f"⚠️ Input truncated to {MAX_TXT_LEN} characters.")
+    synthesizer = load_synth(MODEL_INFO[model_choice])
+    wav = synthesizer.tts(text)
+    # Save temp wav file for playback
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+        synthesizer.save_wav(wav, fp.name)
+        return fp.name
 # Example sentences
 examples = [
     ["Nalubaale y'ennyanja esinga obunene mu Uganda.", "Model 1"],
+    ["Abantu bangi tebamanyi kuwandika bulungi Luganda.", "Model 3"],
     ["Kampala kye kibuga kya Uganda ekikulu.", "Model 5"],
 ]
+# Modern Gradio Blocks UI
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue")) as demo:
+    # Header
+    gr.Markdown(
+        """
+        <div style="text-align: center; padding: 1em 0;">
+            <h1>🗣️ Luganda TTS 🗣️</h1>
+            <p style="font-size: 1.2em;">
+                Convert text into natural Luganda speech with fine-tuned neural TTS models.<br>
+                Select a model below and type some Luganda text.
+            </p>
+        </div>
+        """
+    )
+    # Centered card container
+    gr.Markdown(
+        """
+        <div style='max-width: 900px; margin: auto; padding: 20px; background: #ffffff;
+                    border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1);'>
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=3):
+            text_input = gr.Textbox(
+                label="Enter Luganda Text",
+                placeholder="Wandika wano ekigambo mu Luganda...",
+                value="Gyebale ko ssebo.",
+                lines=3,
+            )
+            model_choice = gr.Radio(
+                label="Choose Model",
+                choices=list(MODEL_INFO.keys()),
+                value="Model 1",
+                interactive=True,
+            )
+            run_btn = gr.Button("🔊 Generate Speech", variant="primary")
+        with gr.Column(scale=2):
+            audio_output = gr.Audio(label="Generated Speech", type="filepath")
+    gr.Examples(
+        examples=examples,
         inputs=[text_input, model_choice],
+        outputs=[audio_output],
+        fn=tts,
+        cache_examples=False,
     )
+    run_btn.click(fn=tts, inputs=[text_input, model_choice], outputs=audio_output)
+    # Close card
+    gr.Markdown("</div>")
     # Footer
     # gr.Markdown(
     #     """
+    #     ---
+    #     <div style="text-align: center; font-size: 0.9em; color: gray;">
+    #         🚀 Developed with <b>Coqui TTS</b> · Hosted on <b>Hugging Face Spaces</b>
     #     </div>
     #     """
     # )
 if __name__ == "__main__":
+    demo.launch()