Spaces:

MALIBA-AI
/

BambaraText2Speech

Running on Zero

App Files Files Community

sudoping01 commited on 29 days ago

Commit

92275ac

verified ·

1 Parent(s): faebdf2

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -38

app.py CHANGED Viewed

@@ -1,40 +1,45 @@
-import os
-import warnings
-# Set environment variables BEFORE any imports to prevent CUDA initialization
-os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Hide CUDA during startup
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-os.environ["CUDA_LAUNCH_BLOCKING"] = "1"  # For debugging
-# Suppress warnings
-warnings.filterwarnings("ignore")
 import gradio as gr
 import numpy as np
 import spaces
 from huggingface_hub import login
-# These imports should now work without CUDA errors
-from maliba_ai.tts.inference import BambaraTTSInference
-from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
 hf_token = os.getenv("HF_TOKEN")
 if hf_token:
     login(token=hf_token)
-# Initialize TTS model (this will use CPU during startup)
-print("Loading Bambara TTS model...")
-tts = BambaraTTSInference()
-print("Model loaded successfully!")
-SPEAKERS = {
-    "Adame": Adame,
-    "Moussa": Moussa,
-    "Bourama": Bourama,
-    "Modibo": Modibo,
-    "Seydou": Seydou
-}
 def validate_inputs(text, temperature, top_k, top_p, max_tokens):
     """Validate user inputs"""
@@ -42,7 +47,7 @@ def validate_inputs(text, temperature, top_k, top_p, max_tokens):
         return False, "Please enter some Bambara text."
     if not (0.001 <= temperature <= 1):
-        return False, "Temperature must be between positive"
     if not (1 <= top_k <= 100):
         return False, "Top-K must be between 1 and 100"
@@ -59,14 +64,10 @@ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p,
         return None, "Please enter some Bambara text."
     try:
-        # Re-enable CUDA for GPU context
-        import torch
-        if torch.cuda.is_available():
-            # Remove CUDA visibility restriction for GPU execution
-            if "CUDA_VISIBLE_DEVICES" in os.environ:
-                os.environ.pop("CUDA_VISIBLE_DEVICES", None)
-        speaker = SPEAKERS[speaker_name]
         if use_advanced:
             is_valid, error_msg = validate_inputs(text, temperature, top_k, top_p, max_tokens)
@@ -94,8 +95,14 @@ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p,
         return (sample_rate, waveform), f"✅ Audio generated successfully"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
 examples = [
     ["Aw ni ce", "Adame"],
     ["I ni ce", "Moussa"],
@@ -117,7 +124,7 @@ with gr.Blocks(title="Bambara TTS - EXPERIMENTAL", theme=gr.themes.Soft()) as de
     **Bambara** is spoken by millions of people in Mali and West Africa.
-    ⚡ **Note**: Model loads on CPU during startup, then uses GPU for generation.
     """)
     with gr.Row():
@@ -132,7 +139,7 @@ with gr.Blocks(title="Bambara TTS - EXPERIMENTAL", theme=gr.themes.Soft()) as de
             )
             speaker_dropdown = gr.Dropdown(
-                choices=list(SPEAKERS.keys()),
                 value="Adame",
                 label="🗣️ Speaker Voice"
             )
@@ -216,8 +223,9 @@ with gr.Blocks(title="Bambara TTS - EXPERIMENTAL", theme=gr.themes.Soft()) as de
         gr.Markdown("""
         **⚠️ This is an experimental Bambara TTS model.**
-        The model loads on CPU during startup to avoid CUDA initialization errors,
-        then switches to GPU during speech generation for optimal performance.
         """)
     def toggle_advanced(use_adv):

 import gradio as gr
 import numpy as np
+import os
 import spaces
 from huggingface_hub import login
+# DO NOT import maliba_ai here - it will cause CUDA errors
+# from maliba_ai.tts.inference import BambaraTTSInference
+# from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
 hf_token = os.getenv("HF_TOKEN")
 if hf_token:
     login(token=hf_token)
+# Global variable to store the TTS instance
+tts_instance = None
+SPEAKERS = None
+def initialize_tts():
+    """Initialize TTS model and speakers - only called inside GPU context"""
+    global tts_instance, SPEAKERS
+    if tts_instance is None:
+        print("Loading Bambara TTS model...")
+        # Import here to avoid CUDA initialization during app startup
+        from maliba_ai.tts.inference import BambaraTTSInference
+        from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
+        tts_instance = BambaraTTSInference()
+        SPEAKERS = {
+            "Adame": Adame,
+            "Moussa": Moussa,
+            "Bourama": Bourama,
+            "Modibo": Modibo,
+            "Seydou": Seydou
+        }
+        print("Model loaded successfully!")
+    return tts_instance, SPEAKERS
 def validate_inputs(text, temperature, top_k, top_p, max_tokens):
     """Validate user inputs"""
         return False, "Please enter some Bambara text."
     if not (0.001 <= temperature <= 1):
+        return False, "Temperature must be between 0.001 and 1"
     if not (1 <= top_k <= 100):
         return False, "Top-K must be between 1 and 100"
         return None, "Please enter some Bambara text."
     try:
+        # Initialize TTS inside GPU context
+        tts, speakers = initialize_tts()
+        speaker = speakers[speaker_name]
         if use_advanced:
             is_valid, error_msg = validate_inputs(text, temperature, top_k, top_p, max_tokens)
         return (sample_rate, waveform), f"✅ Audio generated successfully"
     except Exception as e:
+        import traceback
+        error_msg = f"❌ Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
+        print(error_msg)  # Log to console for debugging
         return None, f"❌ Error: {str(e)}"
+# Define speaker names for UI (without importing the actual speaker objects)
+SPEAKER_NAMES = ["Adame", "Moussa", "Bourama", "Modibo", "Seydou"]
 examples = [
     ["Aw ni ce", "Adame"],
     ["I ni ce", "Moussa"],
     **Bambara** is spoken by millions of people in Mali and West Africa.
+    ⚡ **Note**: The model will load when you first generate speech (may take a moment).
     """)
     with gr.Row():
             )
             speaker_dropdown = gr.Dropdown(
+                choices=SPEAKER_NAMES,
                 value="Adame",
                 label="🗣️ Speaker Voice"
             )
         gr.Markdown("""
         **⚠️ This is an experimental Bambara TTS model.**
+        - The model loads automatically when you first generate speech
+        - First generation may take longer due to model initialization
+        - GPU acceleration is used for optimal performance
         """)
     def toggle_advanced(use_adv):