Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,9 +6,10 @@ os.environ["TORCH_COMPILE_DISABLE"] = "1"
|
|
6 |
os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
|
7 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
8 |
|
9 |
-
# Set CUDA environment to help with unsloth GPU detection
|
10 |
-
os.
|
11 |
-
os.environ["
|
|
|
12 |
|
13 |
import torch
|
14 |
import gradio as gr
|
@@ -28,8 +29,11 @@ hf_token = os.getenv("HF_TOKEN")
|
|
28 |
if hf_token:
|
29 |
login(token=hf_token)
|
30 |
|
31 |
-
# Check GPU availability
|
32 |
-
if
|
|
|
|
|
|
|
33 |
device = "cuda"
|
34 |
logger.info("Using CUDA for inference.")
|
35 |
elif torch.backends.mps.is_available():
|
@@ -72,13 +76,23 @@ def get_speakers_dict():
|
|
72 |
return {}
|
73 |
|
74 |
def initialize_tts_model():
|
75 |
-
"""Initialize TTS model globally -
|
76 |
try:
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
start_time = time.time()
|
79 |
|
80 |
# Import and initialize the TTS model
|
81 |
-
from maliba_ai.tts
|
82 |
|
83 |
# Initialize model
|
84 |
model = BambaraTTSInference()
|
@@ -89,16 +103,15 @@ def initialize_tts_model():
|
|
89 |
return model
|
90 |
|
91 |
except Exception as e:
|
92 |
-
logger.error(f"Failed to initialize TTS model: {e}")
|
93 |
-
logger.info("Model will be initialized on first request
|
94 |
return None
|
95 |
|
96 |
-
# Initialize speakers dictionary
|
97 |
speakers_dict = get_speakers_dict()
|
98 |
logger.info(f"Available speakers: {list(speakers_dict.keys())}")
|
99 |
|
100 |
-
# Try to initialize model globally
|
101 |
-
# If it fails due to GPU detection, it will be None and we'll init on first request
|
102 |
tts_model = initialize_tts_model()
|
103 |
|
104 |
def validate_inputs(text, temperature, top_k, top_p, max_tokens):
|
@@ -188,18 +201,18 @@ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p,
|
|
188 |
# Get available speakers for dropdown
|
189 |
SPEAKER_NAMES = list(speakers_dict.keys()) if speakers_dict else ["Adama", "Moussa", "Bourama", "Modibo", "Seydou"]
|
190 |
|
191 |
-
|
192 |
examples = [
|
193 |
-
["Aw ni ce", "Adama"],
|
194 |
-
["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Bakary"],
|
195 |
-
["Ne bɛ se ka sɛbɛnni yɛlɛma ka kɛ kuma ye", "Moussa"],
|
196 |
-
["I ka kɛnɛ wa?", "Ngolo"],
|
197 |
-
["Lakɔli karamɔgɔw tun tɛ ka se ka sɛbɛnni kɛ ka ɲɛ walanda kan wa denmisɛnw tun tɛ ka se ka o sɛbɛnni ninnu ye, kuma tɛ ka u kalan. Denmisɛnw kɛra kunfinw ye.", "Bourama"],
|
198 |
-
["sigikafɔ kɔnɔ jamanaw ni ɲɔgɔn cɛ, olu ye a haminankow ye, wa o ko ninnu ka kan ka kɛ sariya ani tilennenya kɔnɔ.", "Ibrahima"],
|
199 |
-
["Aw ni ce. Ne tɔgɔ ye Adama. Awɔ, ne ye maliden de ye. Aw Sanbɛ Sanbɛ. San min tɛ ɲinan ye, an bɛɛ ka jɛ ka o seli ɲɔgɔn fɛ, hɛɛrɛ ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa ɲɛ. Ala ka an ka seliw caya. Ala ka yafa an bɛɛ ma.", "Amara"],
|
200 |
-
["An dɔlakelen bɛ masike bilenman don ka tɔw gɛn.", "Modibo"],
|
201 |
-
["Aw ni ce. Seidu bɛ aw fo wa aw ka yafa a ma, ka da a kan tuma dɔw la kow ka can.", "Amadou"],
|
202 |
-
["Bamanankan ye kan ɲuman ye", "Seydou"],
|
203 |
]
|
204 |
|
205 |
def build_interface():
|
@@ -224,7 +237,7 @@ def build_interface():
|
|
224 |
|
225 |
**Bambara** is spoken by millions of people in Mali and West Africa 🌍
|
226 |
|
227 |
-
**Status**: {'✅ Model loaded' if tts_model is not None else '⏳ Model
|
228 |
""", elem_classes=["main-header"])
|
229 |
|
230 |
with gr.Row():
|
|
|
6 |
os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
|
7 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
8 |
|
9 |
+
# Set CUDA environment to help with unsloth GPU detection (only if not ZeroGPU)
|
10 |
+
if not os.getenv("ZERO_GPU"):
|
11 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Force GPU visibility
|
12 |
+
os.environ["FORCE_CUDA"] = "1" # Force CUDA usage
|
13 |
|
14 |
import torch
|
15 |
import gradio as gr
|
|
|
29 |
if hf_token:
|
30 |
login(token=hf_token)
|
31 |
|
32 |
+
# Check GPU availability (but don't initialize CUDA yet in ZeroGPU)
|
33 |
+
if os.getenv("ZERO_GPU"):
|
34 |
+
device = "cuda" # Assume CUDA in ZeroGPU
|
35 |
+
logger.info("ZeroGPU environment detected - CUDA will be available in decorated functions")
|
36 |
+
elif torch.cuda.is_available():
|
37 |
device = "cuda"
|
38 |
logger.info("Using CUDA for inference.")
|
39 |
elif torch.backends.mps.is_available():
|
|
|
76 |
return {}
|
77 |
|
78 |
def initialize_tts_model():
|
79 |
+
"""Initialize TTS model globally - only if we're not in ZeroGPU environment"""
|
80 |
try:
|
81 |
+
# Check if we're in ZeroGPU environment - don't initialize globally
|
82 |
+
if os.getenv("ZERO_GPU") or "zero" in str(os.getenv("SPACE_ID", "")).lower():
|
83 |
+
logger.info("ZeroGPU environment detected - skipping global initialization")
|
84 |
+
return None
|
85 |
+
|
86 |
+
# Only try global init if CUDA is actually available and initialized
|
87 |
+
if not torch.cuda.is_available():
|
88 |
+
logger.info("CUDA not available - skipping global initialization")
|
89 |
+
return None
|
90 |
+
|
91 |
+
logger.info("Attempting global TTS model initialization...")
|
92 |
start_time = time.time()
|
93 |
|
94 |
# Import and initialize the TTS model
|
95 |
+
from maliba_ai.tts import BambaraTTSInference
|
96 |
|
97 |
# Initialize model
|
98 |
model = BambaraTTSInference()
|
|
|
103 |
return model
|
104 |
|
105 |
except Exception as e:
|
106 |
+
logger.error(f"Failed to initialize TTS model globally: {e}")
|
107 |
+
logger.info("Model will be initialized on first request with GPU decorator")
|
108 |
return None
|
109 |
|
110 |
+
# Initialize speakers dictionary (this doesn't require GPU)
|
111 |
speakers_dict = get_speakers_dict()
|
112 |
logger.info(f"Available speakers: {list(speakers_dict.keys())}")
|
113 |
|
114 |
+
# Try to initialize model globally only if not in ZeroGPU environment
|
|
|
115 |
tts_model = initialize_tts_model()
|
116 |
|
117 |
def validate_inputs(text, temperature, top_k, top_p, max_tokens):
|
|
|
201 |
# Get available speakers for dropdown
|
202 |
SPEAKER_NAMES = list(speakers_dict.keys()) if speakers_dict else ["Adama", "Moussa", "Bourama", "Modibo", "Seydou"]
|
203 |
|
204 |
+
# Examples with variety of lengths and speakers matched to content
|
205 |
examples = [
|
206 |
+
["Aw ni ce", "Adama"], # Natural conversational greeting
|
207 |
+
["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Bakary"], # Authoritative tone for serious topic
|
208 |
+
["Ne bɛ se ka sɛbɛnni yɛlɛma ka kɛ kuma ye", "Moussa"], # Clear pronunciation for education
|
209 |
+
["I ka kɛnɛ wa?", "Ngolo"], # Youthful energy for casual question
|
210 |
+
["Lakɔli karamɔgɔw tun tɛ ka se ka sɛbɛnni kɛ ka ɲɛ walanda kan wa denmisɛnw tun tɛ ka se ka o sɛbɛnni ninnu ye, kuma tɛ ka u kalan. Denmisɛnw kɛra kunfinw ye.", "Bourama"], # Most stable for long educational text
|
211 |
+
["sigikafɔ kɔnɔ jamanaw ni ɲɔgɔn cɛ, olu ye a haminankow ye, wa o ko ninnu ka kan ka kɛ sariya ani tilennenya kɔnɔ.", "Ibrahima"], # Calm and measured for formal text
|
212 |
+
["Aw ni ce. Ne tɔgɔ ye Adama. Awɔ, ne ye maliden de ye. Aw Sanbɛ Sanbɛ. San min tɛ ɲinan ye, an bɛɛ ka jɛ ka o seli ɲɔgɔn fɛ, hɛɛrɛ ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa ɲɛ. Ala ka an ka seliw caya. Ala ka yafa an bɛɛ ma.", "Amara"], # Melodic and smooth for heartfelt message
|
213 |
+
["An dɔlakelen bɛ masike bilenman don ka tɔw gɛn.", "Modibo"], # Expressive delivery for dramatic statement
|
214 |
+
["Aw ni ce. Seidu bɛ aw fo wa aw ka yafa a ma, ka da a kan tuma dɔw la kow ka can.", "Amadou"], # Warm and friendly greeting
|
215 |
+
["Bamanankan ye kan ɲuman ye", "Seydou"], # Balanced characteristics for simple statement
|
216 |
]
|
217 |
|
218 |
def build_interface():
|
|
|
237 |
|
238 |
**Bambara** is spoken by millions of people in Mali and West Africa 🌍
|
239 |
|
240 |
+
**Status**: {'✅ Model pre-loaded' if tts_model is not None else '⏳ Model loads on first request (ZeroGPU optimized)'}
|
241 |
""", elem_classes=["main-header"])
|
242 |
|
243 |
with gr.Row():
|