Spaces:

MALIBA-AI
/

BambaraText2Speech

Running on Zero

App Files Files Community

sudoping01 commited on 8 days ago

Commit

e347941

verified ·

1 Parent(s): 60bcd81

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -28

app.py CHANGED Viewed

@@ -36,22 +36,33 @@ def get_speakers_dict():
         # Use the correct new structure as shown in your example
         from maliba_ai.config.settings import Speakers
-        # Create dictionary with all 10 speakers using proper syntax
-        available_speakers = {
-            "Bourama": Speakers.Bourama,
-            "Adama": Speakers.Adama,
-            "Moussa": Speakers.Moussa,
-            "Modibo": Speakers.Modibo,
-            "Seydou": Speakers.Seydou,
-            "Amadou": Speakers.Amadou,
-            "Bakary": Speakers.Bakary,
-            "Ngolo": Speakers.Ngolo,
-            "Ibrahima": Speakers.Ibrahima,
-            "Amara": Speakers.Amara
         }
-        logger.info(f"Loaded {len(available_speakers)} speakers from new structure: {list(available_speakers.keys())}")
-        return available_speakers
     except Exception as e:
         logger.error(f"Failed to import from new settings structure: {e}")
@@ -93,7 +104,7 @@ def initialize_model_once():
         start_time = time.time()
         # Use the new import structure from the README
-        from maliba_ai.tts.inference import BambaraTTSInference
         model = BambaraTTSInference()
         speakers = get_speakers_dict()
@@ -222,7 +233,7 @@ def get_speaker_names():
 SPEAKER_NAMES = get_speaker_names()
-# Examples with variety of lengths and speakers matched to their characteristics
 examples = [
     ["Aw ni ce", "Adama"],  # Natural conversational greeting
     ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Moussa"],  # Clear pronunciation for informative content
@@ -233,13 +244,33 @@ examples = [
     ["Aw ni ce. Ne tɔgɔ ye Adama. Awɔ, ne ye maliden de ye. Aw Sanbɛ Sanbɛ. San min tɛ ɲinan ye, an bɛɛ ka jɛ ka o seli ɲɔgɔn fɛ, hɛɛrɛ ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa ɲɛ. Ala ka an ka seliw caya. Ala ka yafa an bɛɛ ma.", "Moussa"],  # Clear pronunciation for heartfelt long message
     ["An dɔlakelen bɛ masike bilenman don ka tɔw gɛn.", "Bourama"],  # Most stable for complex statement
     ["Aw ni ce. Seidu bɛ aw fo wa aw ka yafa a ma, ka da a kan tuma dɔw la kow ka can.", "Modibo"],  # Expressive delivery for personal greeting
-    ["To tɔ nantan ni lafiya, o ka fisa ni so fa dumuniba kɛlɛma ye.", "Amadou"],  # Warm and friendly voice for wisdom saying
-    ["Mali ye jamana ɲuman ye!", "Bakary"],  # Deep, authoritative tone for patriotic statement
-    ["An ka ɲɔgɔn dɛmɛ ka baara kɛ ɲɔgɔn fɛ", "Ngolo"],  # Youthful and energetic for collaboration
-    ["Hakili to yɔrɔ min na, sabali bɛ yen", "Ibrahima"],  # Calm and measured for philosophical thought
-    ["Dɔnko ɲuman ye, a bɛ dɔn mɔgɔ kɔnɔ", "Amara"],  # Melodic and smooth for poetic expression
 ]
 def build_interface():
     """Build the Gradio interface - simplified like your old working version"""
@@ -338,12 +369,17 @@ def build_interface():
             gr.Markdown("**Click any example below:**")
-            for i, (text, speaker) in enumerate(examples):
-                btn = gr.Button(f"{text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
-                btn.click(
-                    fn=lambda t=text, s=speaker: load_example(t, s),
-                    outputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens]
-                )
         with gr.Accordion("About", open=False):
             gr.Markdown(f"""
@@ -406,7 +442,7 @@ def main():
     """Main function to launch the Gradio interface"""
     logger.info("Starting Bambara TTS Gradio interface.")
     interface = build_interface()
     interface.launch(
         server_name="0.0.0.0",

         # Use the correct new structure as shown in your example
         from maliba_ai.config.settings import Speakers
+        # Try to get all 10 speakers, but handle gracefully if some don't exist
+        available_speakers = {}
+        all_speakers = {
+            "Bourama": "Bourama",
+            "Adama": "Adama",
+            "Moussa": "Moussa",
+            "Modibo": "Modibo",
+            "Seydou": "Seydou",
+            "Amadou": "Amadou",
+            "Bakary": "Bakary",
+            "Ngolo": "Ngolo",
+            "Ibrahima": "Ibrahima",
+            "Amara": "Amara"
         }
+        for name, attr_name in all_speakers.items():
+            try:
+                if hasattr(Speakers, attr_name):
+                    available_speakers[name] = getattr(Speakers, attr_name)
+            except:
+                continue
+        if available_speakers:
+            logger.info(f"Loaded {len(available_speakers)} speakers from new structure: {list(available_speakers.keys())}")
+            return available_speakers
+        else:
+            raise AttributeError("No speakers found in new structure")
     except Exception as e:
         logger.error(f"Failed to import from new settings structure: {e}")
         start_time = time.time()
         # Use the new import structure from the README
+        from maliba_ai.tts import BambaraTTSInference
         model = BambaraTTSInference()
         speakers = get_speakers_dict()
 SPEAKER_NAMES = get_speaker_names()
+# Examples with variety of lengths - use only available speakers with fallbacks
 examples = [
     ["Aw ni ce", "Adama"],  # Natural conversational greeting
     ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Moussa"],  # Clear pronunciation for informative content
     ["Aw ni ce. Ne tɔgɔ ye Adama. Awɔ, ne ye maliden de ye. Aw Sanbɛ Sanbɛ. San min tɛ ɲinan ye, an bɛɛ ka jɛ ka o seli ɲɔgɔn fɛ, hɛɛrɛ ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa ɲɛ. Ala ka an ka seliw caya. Ala ka yafa an bɛɛ ma.", "Moussa"],  # Clear pronunciation for heartfelt long message
     ["An dɔlakelen bɛ masike bilenman don ka tɔw gɛn.", "Bourama"],  # Most stable for complex statement
     ["Aw ni ce. Seidu bɛ aw fo wa aw ka yafa a ma, ka da a kan tuma dɔw la kow ka can.", "Modibo"],  # Expressive delivery for personal greeting
 ]
+# Additional examples for when all 10 speakers are available
+def get_examples_for_available_speakers():
+    """Generate examples based on available speakers"""
+    base_examples = examples.copy()
+    # Add more examples if we have more speakers available
+    if len(SPEAKER_NAMES) > 5:
+        additional_examples = []
+        # Add examples for additional speakers if they exist
+        if "Amadou" in SPEAKER_NAMES:
+            additional_examples.append(["To tɔ nantan ni lafiya, o ka fisa ni so fa dumuniba kɛlɛma ye.", "Amadou"])
+        if "Bakary" in SPEAKER_NAMES:
+            additional_examples.append(["Mali ye jamana ɲuman ye!", "Bakary"])
+        if "Ngolo" in SPEAKER_NAMES:
+            additional_examples.append(["An ka ɲɔgɔn dɛmɛ ka baara kɛ ɲɔgɔn fɛ", "Ngolo"])
+        if "Ibrahima" in SPEAKER_NAMES:
+            additional_examples.append(["Hakili to yɔrɔ min na, sabali bɛ yen", "Ibrahima"])
+        if "Amara" in SPEAKER_NAMES:
+            additional_examples.append(["Dɔnko ɲuman ye, a bɛ dɔn mɔgɔ kɔnɔ", "Amara"])
+        base_examples.extend(additional_examples)
+    return base_examples
 def build_interface():
     """Build the Gradio interface - simplified like your old working version"""
             gr.Markdown("**Click any example below:**")
+            # Use dynamic examples based on available speakers
+            current_examples = get_examples_for_available_speakers()
+            for i, (text, speaker) in enumerate(current_examples):
+                # Only show examples for speakers that are actually available
+                if speaker in SPEAKER_NAMES:
+                    btn = gr.Button(f"{text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
+                    btn.click(
+                        fn=lambda t=text, s=speaker: load_example(t, s),
+                        outputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens]
+                    )
         with gr.Accordion("About", open=False):
             gr.Markdown(f"""
     """Main function to launch the Gradio interface"""
     logger.info("Starting Bambara TTS Gradio interface.")
+    # DO NOT preload - let it initialize on first request only (like your working version)
     interface = build_interface()
     interface.launch(
         server_name="0.0.0.0",