Spaces:

MALIBA-AI
/

BambaraText2Speech

Running

App Files Files Community

sudoping01 commited on 8 days ago

Commit

9b51e2f

verified ·

1 Parent(s): 6b7c641

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -25

app.py CHANGED Viewed

@@ -6,9 +6,10 @@ os.environ["TORCH_COMPILE_DISABLE"] = "1"
 os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-# Set CUDA environment to help with unsloth GPU detection
-os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Force GPU visibility
-os.environ["FORCE_CUDA"] = "1"            # Force CUDA usage
 import torch
 import gradio as gr
@@ -28,8 +29,11 @@ hf_token = os.getenv("HF_TOKEN")
 if hf_token:
     login(token=hf_token)
-# Check GPU availability
-if torch.cuda.is_available():
     device = "cuda"
     logger.info("Using CUDA for inference.")
 elif torch.backends.mps.is_available():
@@ -72,13 +76,23 @@ def get_speakers_dict():
             return {}
 def initialize_tts_model():
-    """Initialize TTS model globally - similar to ASR space pattern"""
     try:
-        logger.info("Initializing Bambara TTS model globally...")
         start_time = time.time()
         # Import and initialize the TTS model
-        from maliba_ai.tts.inference import BambaraTTSInference
         # Initialize model
         model = BambaraTTSInference()
@@ -89,16 +103,15 @@ def initialize_tts_model():
         return model
     except Exception as e:
-        logger.error(f"Failed to initialize TTS model: {e}")
-        logger.info("Model will be initialized on first request instead")
         return None
-# Initialize speakers dictionary
 speakers_dict = get_speakers_dict()
 logger.info(f"Available speakers: {list(speakers_dict.keys())}")
-# Try to initialize model globally (like ASR space)
-# If it fails due to GPU detection, it will be None and we'll init on first request
 tts_model = initialize_tts_model()
 def validate_inputs(text, temperature, top_k, top_p, max_tokens):
@@ -188,18 +201,18 @@ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p,
 # Get available speakers for dropdown
 SPEAKER_NAMES = list(speakers_dict.keys()) if speakers_dict else ["Adama", "Moussa", "Bourama", "Modibo", "Seydou"]
 examples = [
-    ["Aw ni ce", "Adama"],
-    ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Bakary"],
-    ["Ne bɛ se ka sɛbɛnni yɛlɛma ka kɛ kuma ye", "Moussa"],
-    ["I ka kɛnɛ wa?", "Ngolo"],
-    ["Lakɔli karamɔgɔw tun tɛ ka se ka sɛbɛnni kɛ ka ɲɛ walanda kan wa denmisɛnw tun tɛ ka se ka o sɛbɛnni ninnu ye, kuma tɛ ka u kalan. Denmisɛnw kɛra kunfinw ye.", "Bourama"],
-    ["sigikafɔ kɔnɔ jamanaw ni ɲɔgɔn cɛ, olu ye a haminankow ye, wa o ko ninnu ka kan ka kɛ sariya ani tilennenya kɔnɔ.", "Ibrahima"],
-    ["Aw ni ce. Ne tɔgɔ ye Adama. Awɔ, ne ye maliden de ye. Aw Sanbɛ Sanbɛ. San min tɛ ɲinan ye, an bɛɛ ka jɛ ka o seli ɲɔgɔn fɛ, hɛɛrɛ ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa ɲɛ. Ala ka an ka seliw caya. Ala ka yafa an bɛɛ ma.", "Amara"],
-    ["An dɔlakelen bɛ masike bilenman don ka tɔw gɛn.", "Modibo"],
-    ["Aw ni ce. Seidu bɛ aw fo wa aw ka yafa a ma, ka da a kan tuma dɔw la kow ka can.", "Amadou"],
-    ["Bamanankan ye kan ɲuman ye", "Seydou"],
 ]
 def build_interface():
@@ -224,7 +237,7 @@ def build_interface():
             **Bambara** is spoken by millions of people in Mali and West Africa 🌍
-            **Status**: {'✅ Model loaded' if tts_model is not None else '⏳ Model will load on first request'}
             """, elem_classes=["main-header"])
         with gr.Row():

 os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# Set CUDA environment to help with unsloth GPU detection (only if not ZeroGPU)
+if not os.getenv("ZERO_GPU"):
+    os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Force GPU visibility
+    os.environ["FORCE_CUDA"] = "1"            # Force CUDA usage
 import torch
 import gradio as gr
 if hf_token:
     login(token=hf_token)
+# Check GPU availability (but don't initialize CUDA yet in ZeroGPU)
+if os.getenv("ZERO_GPU"):
+    device = "cuda"  # Assume CUDA in ZeroGPU
+    logger.info("ZeroGPU environment detected - CUDA will be available in decorated functions")
+elif torch.cuda.is_available():
     device = "cuda"
     logger.info("Using CUDA for inference.")
 elif torch.backends.mps.is_available():
             return {}
 def initialize_tts_model():
+    """Initialize TTS model globally - only if we're not in ZeroGPU environment"""
     try:
+        # Check if we're in ZeroGPU environment - don't initialize globally
+        if os.getenv("ZERO_GPU") or "zero" in str(os.getenv("SPACE_ID", "")).lower():
+            logger.info("ZeroGPU environment detected - skipping global initialization")
+            return None
+        # Only try global init if CUDA is actually available and initialized
+        if not torch.cuda.is_available():
+            logger.info("CUDA not available - skipping global initialization")
+            return None
+        logger.info("Attempting global TTS model initialization...")
         start_time = time.time()
         # Import and initialize the TTS model
+        from maliba_ai.tts import BambaraTTSInference
         # Initialize model
         model = BambaraTTSInference()
         return model
     except Exception as e:
+        logger.error(f"Failed to initialize TTS model globally: {e}")
+        logger.info("Model will be initialized on first request with GPU decorator")
         return None
+# Initialize speakers dictionary (this doesn't require GPU)
 speakers_dict = get_speakers_dict()
 logger.info(f"Available speakers: {list(speakers_dict.keys())}")
+# Try to initialize model globally only if not in ZeroGPU environment
 tts_model = initialize_tts_model()
 def validate_inputs(text, temperature, top_k, top_p, max_tokens):
 # Get available speakers for dropdown
 SPEAKER_NAMES = list(speakers_dict.keys()) if speakers_dict else ["Adama", "Moussa", "Bourama", "Modibo", "Seydou"]
+# Examples with variety of lengths and speakers matched to content
 examples = [
+    ["Aw ni ce", "Adama"],  # Natural conversational greeting
+    ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Bakary"],  # Authoritative tone for serious topic
+    ["Ne bɛ se ka sɛbɛnni yɛlɛma ka kɛ kuma ye", "Moussa"],  # Clear pronunciation for education
+    ["I ka kɛnɛ wa?", "Ngolo"],  # Youthful energy for casual question
+    ["Lakɔli karamɔgɔw tun tɛ ka se ka sɛbɛnni kɛ ka ɲɛ walanda kan wa denmisɛnw tun tɛ ka se ka o sɛbɛnni ninnu ye, kuma tɛ ka u kalan. Denmisɛnw kɛra kunfinw ye.", "Bourama"],  # Most stable for long educational text
+    ["sigikafɔ kɔnɔ jamanaw ni ɲɔgɔn cɛ, olu ye a haminankow ye, wa o ko ninnu ka kan ka kɛ sariya ani tilennenya kɔnɔ.", "Ibrahima"],  # Calm and measured for formal text
+    ["Aw ni ce. Ne tɔgɔ ye Adama. Awɔ, ne ye maliden de ye. Aw Sanbɛ Sanbɛ. San min tɛ ɲinan ye, an bɛɛ ka jɛ ka o seli ɲɔgɔn fɛ, hɛɛrɛ ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa ɲɛ. Ala ka an ka seliw caya. Ala ka yafa an bɛɛ ma.", "Amara"],  # Melodic and smooth for heartfelt message
+    ["An dɔlakelen bɛ masike bilenman don ka tɔw gɛn.", "Modibo"],  # Expressive delivery for dramatic statement
+    ["Aw ni ce. Seidu bɛ aw fo wa aw ka yafa a ma, ka da a kan tuma dɔw la kow ka can.", "Amadou"],  # Warm and friendly greeting
+    ["Bamanankan ye kan ɲuman ye", "Seydou"],  # Balanced characteristics for simple statement
 ]
 def build_interface():
             **Bambara** is spoken by millions of people in Mali and West Africa 🌍
+            **Status**: {'✅ Model pre-loaded' if tts_model is not None else '⏳ Model loads on first request (ZeroGPU optimized)'}
             """, elem_classes=["main-header"])
         with gr.Row():