Spaces:

MALIBA-AI
/

BambaraText2Speech

Running on Zero

App Files Files Community

sudoping01 commited on 8 days ago

Commit

17640d7

verified ·

1 Parent(s): dabbe5d

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -62

app.py CHANGED Viewed

@@ -24,38 +24,50 @@ hf_token = os.getenv("HF_TOKEN")
 if hf_token:
     login(token=hf_token)
-# Global variables for model caching
 _tts_model = None
 _speakers_dict = None
 _model_initialized = False
 _initialization_in_progress = False
 def get_speakers_dict():
-    """Get speakers dictionary using the correct SDK structure"""
     try:
-        # Import the Speakers class (not individual speakers)
         from maliba_ai.config.settings import Speakers
-        # Access all 10 speakers through the Speakers class
-        speakers_dict = {
-            "Adama": Speakers.Adama,
-            "Moussa": Speakers.Moussa,
-            "Bourama": Speakers.Bourama,
-            "Modibo": Speakers.Modibo,
-            "Seydou": Speakers.Seydou,
-            "Amadou": Speakers.Amadou,
-            "Bakary": Speakers.Bakary,
-            "Ngolo": Speakers.Ngolo,
-            "Amara": Speakers.Amara,
-            "Ibrahima": Speakers.Ibrahima
-        }
-        logger.info(f"🎤 Successfully loaded {len(speakers_dict)} speakers: {list(speakers_dict.keys())}")
-        return speakers_dict
     except Exception as e:
-        logger.error(f"❌ Failed to import Speakers class: {e}")
-        return {}
 @spaces.GPU()
 def initialize_model_once():
@@ -79,8 +91,8 @@ def initialize_model_once():
         logger.info("Initializing Bambara TTS model...")
         start_time = time.time()
-        # Use the correct import path
-        from maliba_ai.tts.inference import BambaraTTSInference
         model = BambaraTTSInference()
         speakers = get_speakers_dict()
@@ -209,7 +221,7 @@ def get_speaker_names():
 SPEAKER_NAMES = get_speaker_names()
-# Examples representing ALL 10 speakers - with fallbacks for missing speakers
 examples = [
     ["Aw ni ce", "Adama"],  # Natural conversational greeting
     ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Moussa"],  # Clear pronunciation for informative content
@@ -227,31 +239,6 @@ examples = [
     ["Dɔnko ɲuman ye, a bɛ dɔn mɔgɔ kɔnɔ", "Amara"],  # Melodic and smooth for poetic expression
 ]
-def get_safe_examples():
-    """Get examples with speaker fallbacks for missing speakers"""
-    safe_examples = []
-    # Fallback mapping for missing speakers
-    fallback_speakers = {
-        "Amadou": "Adama",    # Warm -> Natural conversational
-        "Bakary": "Modibo",   # Authoritative -> Expressive
-        "Ngolo": "Adama",     # Youthful -> Natural conversational
-        "Ibrahima": "Seydou", # Calm -> Balanced
-        "Amara": "Moussa"     # Melodic -> Clear pronunciation
-    }
-    for text, speaker in examples:
-        # Use original speaker if available, otherwise use fallback
-        if speaker in SPEAKER_NAMES:
-            safe_examples.append([text, speaker])
-        elif speaker in fallback_speakers and fallback_speakers[speaker] in SPEAKER_NAMES:
-            safe_examples.append([text, fallback_speakers[speaker]])
-        else:
-            # Final fallback to first available speaker
-            safe_examples.append([text, SPEAKER_NAMES[0]])
-    return safe_examples
 def build_interface():
     """Build the Gradio interface - simplified like your old working version"""
@@ -259,9 +246,9 @@ def build_interface():
         gr.Markdown("""
         # 🎤 Bambara Text-to-Speech
-        **Powered by MALIBA-AI** | *First Open-Source Bambara TTS*
-        Convert Bambara text to natural-sounding speech using our state-of-the-art neural TTS system.
         **Bambara** is spoken by millions of people in Mali and West Africa.
         """)
@@ -280,7 +267,7 @@ def build_interface():
                     choices=SPEAKER_NAMES,
                     value="Bourama" if "Bourama" in SPEAKER_NAMES else SPEAKER_NAMES[0],
                     label="🗣️ Speaker Voice",
-                    info=f"Choose from {len(SPEAKER_NAMES)} authentic Bambara voices"
                 )
                 generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
@@ -334,7 +321,7 @@ def build_interface():
             label="Generated Speech",
             type="numpy",
             interactive=False,
-            format="wav"
         )
         status_output = gr.Textbox(
@@ -350,10 +337,7 @@ def build_interface():
             gr.Markdown("**Click any example below:**")
-            # Use safe examples with fallbacks for missing speakers
-            safe_examples = get_safe_examples()
-            for i, (text, speaker) in enumerate(safe_examples):
                 btn = gr.Button(f"{text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
                 btn.click(
                     fn=lambda t=text, s=speaker: load_example(t, s),
@@ -365,7 +349,7 @@ def build_interface():
             ## About MALIBA-AI Bambara TTS
             - **🎯 Purpose**: First open-source Text-to-Speech system for Bambara language
-            - **🗣️ Speakers**: {len(SPEAKER_NAMES)} authentic Bambara voices
             - **🔊 Quality**: 16kHz neural speech synthesis
             - **⚡ Performance**: Model loads once and stays in memory
             - **📱 Usage**: Educational, accessibility, and cultural preservation
@@ -373,6 +357,18 @@ def build_interface():
             ### 🎭 Available Speakers:
             {', '.join(SPEAKER_NAMES)}
             **License**: Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)
             ---
@@ -408,7 +404,8 @@ def build_interface():
 def main():
     """Main function to launch the Gradio interface"""
     logger.info("Starting Bambara TTS Gradio interface.")
     interface = build_interface()
     interface.launch(
         server_name="0.0.0.0",
@@ -419,6 +416,4 @@ def main():
     logger.info("Gradio interface launched successfully.")
 if __name__ == "__main__":
-    main()

 if hf_token:
     login(token=hf_token)
+# Global variables for model caching (like your old working version)
 _tts_model = None
 _speakers_dict = None
 _model_initialized = False
 _initialization_in_progress = False
 def get_speakers_dict():
+    """Get speakers dictionary using the new SDK structure"""
     try:
+        # Try the new structure first - check what's actually available
         from maliba_ai.config.settings import Speakers
+        # Get all available speaker attributes dynamically
+        available_speakers = {}
+        # Updated speaker list with all 10 speakers in preferred order
+        speaker_names = ["Bourama", "Adama", "Moussa", "Modibo", "Seydou",
+                        "Amadou", "Bakary", "Ngolo", "Ibrahima", "Amara"]
+        for name in speaker_names:
+            if hasattr(Speakers, name):
+                available_speakers[name] = getattr(Speakers, name)
+        if available_speakers:
+            logger.info(f"Loaded {len(available_speakers)} speakers from new structure: {list(available_speakers.keys())}")
+            return available_speakers
+        else:
+            raise AttributeError("No speakers found in new structure")
     except Exception as e:
+        logger.error(f"Failed to import from new settings structure: {e}")
+        # Fallback to old structure if new one fails
+        try:
+            from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
+            logger.info("Using fallback old speaker structure")
+            return {
+                "Adama": Adame,
+                "Moussa": Moussa,
+                "Bourama": Bourama,
+                "Modibo": Modibo,
+                "Seydou": Seydou
+            }
+        except Exception as e2:
+            logger.error(f"Failed to import speakers: {e2}")
+            return {}
 @spaces.GPU()
 def initialize_model_once():
         logger.info("Initializing Bambara TTS model...")
         start_time = time.time()
+        # Use the new import structure from the README
+        from maliba_ai.tts import BambaraTTSInference
         model = BambaraTTSInference()
         speakers = get_speakers_dict()
 SPEAKER_NAMES = get_speaker_names()
+# Examples with variety of lengths and speakers matched to their characteristics
 examples = [
     ["Aw ni ce", "Adama"],  # Natural conversational greeting
     ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Moussa"],  # Clear pronunciation for informative content
     ["Dɔnko ɲuman ye, a bɛ dɔn mɔgɔ kɔnɔ", "Amara"],  # Melodic and smooth for poetic expression
 ]
 def build_interface():
     """Build the Gradio interface - simplified like your old working version"""
         gr.Markdown("""
         # 🎤 Bambara Text-to-Speech
+        **Powered by MALIBA-AI**
+        Convert Bambara text to speech using our state-of-the-art TTS model.
         **Bambara** is spoken by millions of people in Mali and West Africa.
         """)
                     choices=SPEAKER_NAMES,
                     value="Bourama" if "Bourama" in SPEAKER_NAMES else SPEAKER_NAMES[0],
                     label="🗣️ Speaker Voice",
+                    info=f"Choose from {len(SPEAKER_NAMES)} authentic voices (Bourama recommended)"
                 )
                 generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
             label="Generated Speech",
             type="numpy",
             interactive=False,
+            format="wav"  # Specify WAV format to help with conversion
         )
         status_output = gr.Textbox(
             gr.Markdown("**Click any example below:**")
+            for i, (text, speaker) in enumerate(examples):
                 btn = gr.Button(f"{text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
                 btn.click(
                     fn=lambda t=text, s=speaker: load_example(t, s),
             ## About MALIBA-AI Bambara TTS
             - **🎯 Purpose**: First open-source Text-to-Speech system for Bambara language
+            - **🗣️ Speakers**: {len(SPEAKER_NAMES)} different authentic voices
             - **🔊 Quality**: 16kHz neural speech synthesis
             - **⚡ Performance**: Model loads once and stays in memory
             - **📱 Usage**: Educational, accessibility, and cultural preservation
             ### 🎭 Available Speakers:
             {', '.join(SPEAKER_NAMES)}
+            ### 🎯 Speaker Characteristics:
+            - **Bourama**: Most stable and accurate (recommended)
+            - **Adama**: Natural conversational tone
+            - **Moussa**: Clear pronunciation for educational content
+            - **Modibo**: Expressive delivery for storytelling
+            - **Seydou**: Balanced characteristics for general use
+            - **Amadou**: Warm and friendly voice
+            - **Bakary**: Deep, authoritative tone
+            - **Ngolo**: Youthful and energetic
+            - **Ibrahima**: Calm and measured delivery
+            - **Amara**: Melodic and smooth
             **License**: Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)
             ---
 def main():
     """Main function to launch the Gradio interface"""
     logger.info("Starting Bambara TTS Gradio interface.")
+    # DO NOT preload - let it initialize on first request only (like your working version)
     interface = build_interface()
     interface.launch(
         server_name="0.0.0.0",
     logger.info("Gradio interface launched successfully.")
 if __name__ == "__main__":
+    main()