sudoping01 commited on
Commit
9b51e2f
·
verified ·
1 Parent(s): 6b7c641

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -25
app.py CHANGED
@@ -6,9 +6,10 @@ os.environ["TORCH_COMPILE_DISABLE"] = "1"
6
  os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
7
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
 
9
- # Set CUDA environment to help with unsloth GPU detection
10
- os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Force GPU visibility
11
- os.environ["FORCE_CUDA"] = "1" # Force CUDA usage
 
12
 
13
  import torch
14
  import gradio as gr
@@ -28,8 +29,11 @@ hf_token = os.getenv("HF_TOKEN")
28
  if hf_token:
29
  login(token=hf_token)
30
 
31
- # Check GPU availability
32
- if torch.cuda.is_available():
 
 
 
33
  device = "cuda"
34
  logger.info("Using CUDA for inference.")
35
  elif torch.backends.mps.is_available():
@@ -72,13 +76,23 @@ def get_speakers_dict():
72
  return {}
73
 
74
  def initialize_tts_model():
75
- """Initialize TTS model globally - similar to ASR space pattern"""
76
  try:
77
- logger.info("Initializing Bambara TTS model globally...")
 
 
 
 
 
 
 
 
 
 
78
  start_time = time.time()
79
 
80
  # Import and initialize the TTS model
81
- from maliba_ai.tts.inference import BambaraTTSInference
82
 
83
  # Initialize model
84
  model = BambaraTTSInference()
@@ -89,16 +103,15 @@ def initialize_tts_model():
89
  return model
90
 
91
  except Exception as e:
92
- logger.error(f"Failed to initialize TTS model: {e}")
93
- logger.info("Model will be initialized on first request instead")
94
  return None
95
 
96
- # Initialize speakers dictionary
97
  speakers_dict = get_speakers_dict()
98
  logger.info(f"Available speakers: {list(speakers_dict.keys())}")
99
 
100
- # Try to initialize model globally (like ASR space)
101
- # If it fails due to GPU detection, it will be None and we'll init on first request
102
  tts_model = initialize_tts_model()
103
 
104
  def validate_inputs(text, temperature, top_k, top_p, max_tokens):
@@ -188,18 +201,18 @@ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p,
188
  # Get available speakers for dropdown
189
  SPEAKER_NAMES = list(speakers_dict.keys()) if speakers_dict else ["Adama", "Moussa", "Bourama", "Modibo", "Seydou"]
190
 
191
-
192
  examples = [
193
- ["Aw ni ce", "Adama"],
194
- ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Bakary"],
195
- ["Ne bɛ se ka sɛbɛnni yɛlɛma ka kɛ kuma ye", "Moussa"],
196
- ["I ka kɛnɛ wa?", "Ngolo"],
197
- ["Lakɔli karamɔgɔw tun tɛ ka se ka sɛbɛnni kɛ ka ɲɛ walanda kan wa denmisɛnw tun tɛ ka se ka o sɛbɛnni ninnu ye, kuma tɛ ka u kalan. Denmisɛnw kɛra kunfinw ye.", "Bourama"],
198
- ["sigikafɔ kɔnɔ jamanaw ni ɲɔgɔn cɛ, olu ye a haminankow ye, wa o ko ninnu ka kan ka kɛ sariya ani tilennenya kɔnɔ.", "Ibrahima"],
199
- ["Aw ni ce. Ne tɔgɔ ye Adama. Awɔ, ne ye maliden de ye. Aw Sanbɛ Sanbɛ. San min tɛ ɲinan ye, an bɛɛ ka jɛ ka o seli ɲɔgɔn fɛ, hɛɛrɛ ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa ɲɛ. Ala ka an ka seliw caya. Ala ka yafa an bɛɛ ma.", "Amara"],
200
- ["An dɔlakelen bɛ masike bilenman don ka tɔw gɛn.", "Modibo"],
201
- ["Aw ni ce. Seidu bɛ aw fo wa aw ka yafa a ma, ka da a kan tuma dɔw la kow ka can.", "Amadou"],
202
- ["Bamanankan ye kan ɲuman ye", "Seydou"],
203
  ]
204
 
205
  def build_interface():
@@ -224,7 +237,7 @@ def build_interface():
224
 
225
  **Bambara** is spoken by millions of people in Mali and West Africa 🌍
226
 
227
- **Status**: {'✅ Model loaded' if tts_model is not None else '⏳ Model will load on first request'}
228
  """, elem_classes=["main-header"])
229
 
230
  with gr.Row():
 
6
  os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
7
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
 
9
+ # Set CUDA environment to help with unsloth GPU detection (only if not ZeroGPU)
10
+ if not os.getenv("ZERO_GPU"):
11
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Force GPU visibility
12
+ os.environ["FORCE_CUDA"] = "1" # Force CUDA usage
13
 
14
  import torch
15
  import gradio as gr
 
29
  if hf_token:
30
  login(token=hf_token)
31
 
32
+ # Check GPU availability (but don't initialize CUDA yet in ZeroGPU)
33
+ if os.getenv("ZERO_GPU"):
34
+ device = "cuda" # Assume CUDA in ZeroGPU
35
+ logger.info("ZeroGPU environment detected - CUDA will be available in decorated functions")
36
+ elif torch.cuda.is_available():
37
  device = "cuda"
38
  logger.info("Using CUDA for inference.")
39
  elif torch.backends.mps.is_available():
 
76
  return {}
77
 
78
  def initialize_tts_model():
79
+ """Initialize TTS model globally - only if we're not in ZeroGPU environment"""
80
  try:
81
+ # Check if we're in ZeroGPU environment - don't initialize globally
82
+ if os.getenv("ZERO_GPU") or "zero" in str(os.getenv("SPACE_ID", "")).lower():
83
+ logger.info("ZeroGPU environment detected - skipping global initialization")
84
+ return None
85
+
86
+ # Only try global init if CUDA is actually available and initialized
87
+ if not torch.cuda.is_available():
88
+ logger.info("CUDA not available - skipping global initialization")
89
+ return None
90
+
91
+ logger.info("Attempting global TTS model initialization...")
92
  start_time = time.time()
93
 
94
  # Import and initialize the TTS model
95
+ from maliba_ai.tts import BambaraTTSInference
96
 
97
  # Initialize model
98
  model = BambaraTTSInference()
 
103
  return model
104
 
105
  except Exception as e:
106
+ logger.error(f"Failed to initialize TTS model globally: {e}")
107
+ logger.info("Model will be initialized on first request with GPU decorator")
108
  return None
109
 
110
+ # Initialize speakers dictionary (this doesn't require GPU)
111
  speakers_dict = get_speakers_dict()
112
  logger.info(f"Available speakers: {list(speakers_dict.keys())}")
113
 
114
+ # Try to initialize model globally only if not in ZeroGPU environment
 
115
  tts_model = initialize_tts_model()
116
 
117
  def validate_inputs(text, temperature, top_k, top_p, max_tokens):
 
201
  # Get available speakers for dropdown
202
  SPEAKER_NAMES = list(speakers_dict.keys()) if speakers_dict else ["Adama", "Moussa", "Bourama", "Modibo", "Seydou"]
203
 
204
+ # Examples with variety of lengths and speakers matched to content
205
  examples = [
206
+ ["Aw ni ce", "Adama"], # Natural conversational greeting
207
+ ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Bakary"], # Authoritative tone for serious topic
208
+ ["Ne bɛ se ka sɛbɛnni yɛlɛma ka kɛ kuma ye", "Moussa"], # Clear pronunciation for education
209
+ ["I ka kɛnɛ wa?", "Ngolo"], # Youthful energy for casual question
210
+ ["Lakɔli karamɔgɔw tun tɛ ka se ka sɛbɛnni kɛ ka ɲɛ walanda kan wa denmisɛnw tun tɛ ka se ka o sɛbɛnni ninnu ye, kuma tɛ ka u kalan. Denmisɛnw kɛra kunfinw ye.", "Bourama"], # Most stable for long educational text
211
+ ["sigikafɔ kɔnɔ jamanaw ni ɲɔgɔn cɛ, olu ye a haminankow ye, wa o ko ninnu ka kan ka kɛ sariya ani tilennenya kɔnɔ.", "Ibrahima"], # Calm and measured for formal text
212
+ ["Aw ni ce. Ne tɔgɔ ye Adama. Awɔ, ne ye maliden de ye. Aw Sanbɛ Sanbɛ. San min tɛ ɲinan ye, an bɛɛ ka jɛ ka o seli ɲɔgɔn fɛ, hɛɛrɛ ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa ɲɛ. Ala ka an ka seliw caya. Ala ka yafa an bɛɛ ma.", "Amara"], # Melodic and smooth for heartfelt message
213
+ ["An dɔlakelen bɛ masike bilenman don ka tɔw gɛn.", "Modibo"], # Expressive delivery for dramatic statement
214
+ ["Aw ni ce. Seidu bɛ aw fo wa aw ka yafa a ma, ka da a kan tuma dɔw la kow ka can.", "Amadou"], # Warm and friendly greeting
215
+ ["Bamanankan ye kan ɲuman ye", "Seydou"], # Balanced characteristics for simple statement
216
  ]
217
 
218
  def build_interface():
 
237
 
238
  **Bambara** is spoken by millions of people in Mali and West Africa 🌍
239
 
240
+ **Status**: {'✅ Model pre-loaded' if tts_model is not None else '⏳ Model loads on first request (ZeroGPU optimized)'}
241
  """, elem_classes=["main-header"])
242
 
243
  with gr.Row():