sudoping01 commited on
Commit
50f13c9
·
verified ·
1 Parent(s): b03e687

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -56
app.py CHANGED
@@ -1,10 +1,15 @@
1
  import os
2
 
 
3
  os.environ["TORCHDYNAMO_DISABLE"] = "1"
4
  os.environ["TORCH_COMPILE_DISABLE"] = "1"
5
  os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
6
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
7
 
 
 
 
 
8
  import torch
9
  import gradio as gr
10
  import numpy as np
@@ -24,50 +29,38 @@ hf_token = os.getenv("HF_TOKEN")
24
  if hf_token:
25
  login(token=hf_token)
26
 
27
- # Global variables for model caching (like your old working version)
28
  _tts_model = None
29
  _speakers_dict = None
30
  _model_initialized = False
31
  _initialization_in_progress = False
32
 
33
  def get_speakers_dict():
34
- """Get speakers dictionary using the new SDK structure"""
35
  try:
36
- # Try the new structure first - check what's actually available
37
  from maliba_ai.config.settings import Speakers
38
 
39
- # Get all available speaker attributes dynamically
40
- available_speakers = {}
41
- # Updated speaker list with all 10 speakers in preferred order
42
- speaker_names = ["Bourama", "Adama", "Moussa", "Modibo", "Seydou",
43
- "Amadou", "Bakary", "Ngolo", "Ibrahima", "Amara"]
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- for name in speaker_names:
46
- if hasattr(Speakers, name):
47
- available_speakers[name] = getattr(Speakers, name)
48
-
49
- if available_speakers:
50
- logger.info(f"Loaded {len(available_speakers)} speakers from new structure: {list(available_speakers.keys())}")
51
- return available_speakers
52
- else:
53
- raise AttributeError("No speakers found in new structure")
54
-
55
  except Exception as e:
56
- logger.error(f"Failed to import from new settings structure: {e}")
57
- # Fallback to old structure if new one fails
58
- try:
59
- from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
60
- logger.info("Using fallback old speaker structure")
61
- return {
62
- "Adama": Adame,
63
- "Moussa": Moussa,
64
- "Bourama": Bourama,
65
- "Modibo": Modibo,
66
- "Seydou": Seydou
67
- }
68
- except Exception as e2:
69
- logger.error(f"Failed to import speakers: {e2}")
70
- return {}
71
 
72
  @spaces.GPU()
73
  def initialize_model_once():
@@ -91,8 +84,8 @@ def initialize_model_once():
91
  logger.info("Initializing Bambara TTS model...")
92
  start_time = time.time()
93
 
94
- # Use the new import structure from the README
95
- from maliba_ai.tts import BambaraTTSInference
96
 
97
  model = BambaraTTSInference()
98
  speakers = get_speakers_dict()
@@ -221,7 +214,7 @@ def get_speaker_names():
221
 
222
  SPEAKER_NAMES = get_speaker_names()
223
 
224
- # Examples with variety of lengths and speakers matched to their characteristics
225
  examples = [
226
  ["Aw ni ce", "Adama"], # Natural conversational greeting
227
  ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Moussa"], # Clear pronunciation for informative content
@@ -239,6 +232,31 @@ examples = [
239
  ["Dɔnko ɲuman ye, a bɛ dɔn mɔgɔ kɔnɔ", "Amara"], # Melodic and smooth for poetic expression
240
  ]
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  def build_interface():
243
  """Build the Gradio interface - simplified like your old working version"""
244
 
@@ -246,9 +264,9 @@ def build_interface():
246
  gr.Markdown("""
247
  # 🎤 Bambara Text-to-Speech
248
 
249
- **Powered by MALIBA-AI**
250
 
251
- Convert Bambara text to speech using our state-of-the-art TTS model.
252
 
253
  **Bambara** is spoken by millions of people in Mali and West Africa.
254
  """)
@@ -267,7 +285,7 @@ def build_interface():
267
  choices=SPEAKER_NAMES,
268
  value="Bourama" if "Bourama" in SPEAKER_NAMES else SPEAKER_NAMES[0],
269
  label="🗣️ Speaker Voice",
270
- info=f"Choose from {len(SPEAKER_NAMES)} authentic voices (Bourama recommended)"
271
  )
272
 
273
  generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
@@ -321,7 +339,7 @@ def build_interface():
321
  label="Generated Speech",
322
  type="numpy",
323
  interactive=False,
324
- format="wav" # Specify WAV format to help with conversion
325
  )
326
 
327
  status_output = gr.Textbox(
@@ -337,7 +355,10 @@ def build_interface():
337
 
338
  gr.Markdown("**Click any example below:**")
339
 
340
- for i, (text, speaker) in enumerate(examples):
 
 
 
341
  btn = gr.Button(f"{text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
342
  btn.click(
343
  fn=lambda t=text, s=speaker: load_example(t, s),
@@ -349,7 +370,7 @@ def build_interface():
349
  ## About MALIBA-AI Bambara TTS
350
 
351
  - **🎯 Purpose**: First open-source Text-to-Speech system for Bambara language
352
- - **🗣️ Speakers**: {len(SPEAKER_NAMES)} different authentic voices
353
  - **🔊 Quality**: 16kHz neural speech synthesis
354
  - **⚡ Performance**: Model loads once and stays in memory
355
  - **📱 Usage**: Educational, accessibility, and cultural preservation
@@ -357,18 +378,6 @@ def build_interface():
357
  ### 🎭 Available Speakers:
358
  {', '.join(SPEAKER_NAMES)}
359
 
360
- ### 🎯 Speaker Characteristics:
361
- - **Bourama**: Most stable and accurate (recommended)
362
- - **Adama**: Natural conversational tone
363
- - **Moussa**: Clear pronunciation for educational content
364
- - **Modibo**: Expressive delivery for storytelling
365
- - **Seydou**: Balanced characteristics for general use
366
- - **Amadou**: Warm and friendly voice
367
- - **Bakary**: Deep, authoritative tone
368
- - **Ngolo**: Youthful and energetic
369
- - **Ibrahima**: Calm and measured delivery
370
- - **Amara**: Melodic and smooth
371
-
372
  **License**: Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)
373
 
374
  ---
@@ -404,8 +413,7 @@ def build_interface():
404
  def main():
405
  """Main function to launch the Gradio interface"""
406
  logger.info("Starting Bambara TTS Gradio interface.")
407
-
408
- # DO NOT preload - let it initialize on first request only (like your working version)
409
  interface = build_interface()
410
  interface.launch(
411
  server_name="0.0.0.0",
 
1
  import os
2
 
3
+ # Disable problematic optimizations for ZeroGPU compatibility
4
  os.environ["TORCHDYNAMO_DISABLE"] = "1"
5
  os.environ["TORCH_COMPILE_DISABLE"] = "1"
6
  os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
7
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
 
9
+ # Disable Unsloth optimizations that cause issues in ZeroGPU
10
+ os.environ["UNSLOTH_DISABLE"] = "1"
11
+ os.environ["DISABLE_UNSLOTH"] = "1"
12
+
13
  import torch
14
  import gradio as gr
15
  import numpy as np
 
29
  if hf_token:
30
  login(token=hf_token)
31
 
32
+ # Global variables for model caching
33
  _tts_model = None
34
  _speakers_dict = None
35
  _model_initialized = False
36
  _initialization_in_progress = False
37
 
38
  def get_speakers_dict():
39
+ """Get speakers dictionary using the correct SDK structure"""
40
  try:
41
+ # Import the Speakers class (not individual speakers)
42
  from maliba_ai.config.settings import Speakers
43
 
44
+ # Access all 10 speakers through the Speakers class
45
+ speakers_dict = {
46
+ "Adama": Speakers.Adama,
47
+ "Moussa": Speakers.Moussa,
48
+ "Bourama": Speakers.Bourama,
49
+ "Modibo": Speakers.Modibo,
50
+ "Seydou": Speakers.Seydou,
51
+ "Amadou": Speakers.Amadou,
52
+ "Bakary": Speakers.Bakary,
53
+ "Ngolo": Speakers.Ngolo,
54
+ "Amara": Speakers.Amara,
55
+ "Ibrahima": Speakers.Ibrahima
56
+ }
57
+
58
+ logger.info(f"🎤 Successfully loaded {len(speakers_dict)} speakers: {list(speakers_dict.keys())}")
59
+ return speakers_dict
60
 
 
 
 
 
 
 
 
 
 
 
61
  except Exception as e:
62
+ logger.error(f"Failed to import Speakers class: {e}")
63
+ return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  @spaces.GPU()
66
  def initialize_model_once():
 
84
  logger.info("Initializing Bambara TTS model...")
85
  start_time = time.time()
86
 
87
+ # Use the correct import path
88
+ from maliba_ai.tts.inference import BambaraTTSInference
89
 
90
  model = BambaraTTSInference()
91
  speakers = get_speakers_dict()
 
214
 
215
  SPEAKER_NAMES = get_speaker_names()
216
 
217
+ # Examples representing ALL 10 speakers - with fallbacks for missing speakers
218
  examples = [
219
  ["Aw ni ce", "Adama"], # Natural conversational greeting
220
  ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Moussa"], # Clear pronunciation for informative content
 
232
  ["Dɔnko ɲuman ye, a bɛ dɔn mɔgɔ kɔnɔ", "Amara"], # Melodic and smooth for poetic expression
233
  ]
234
 
235
+ def get_safe_examples():
236
+ """Get examples with speaker fallbacks for missing speakers"""
237
+ safe_examples = []
238
+
239
+ # Fallback mapping for missing speakers
240
+ fallback_speakers = {
241
+ "Amadou": "Adama", # Warm -> Natural conversational
242
+ "Bakary": "Modibo", # Authoritative -> Expressive
243
+ "Ngolo": "Adama", # Youthful -> Natural conversational
244
+ "Ibrahima": "Seydou", # Calm -> Balanced
245
+ "Amara": "Moussa" # Melodic -> Clear pronunciation
246
+ }
247
+
248
+ for text, speaker in examples:
249
+ # Use original speaker if available, otherwise use fallback
250
+ if speaker in SPEAKER_NAMES:
251
+ safe_examples.append([text, speaker])
252
+ elif speaker in fallback_speakers and fallback_speakers[speaker] in SPEAKER_NAMES:
253
+ safe_examples.append([text, fallback_speakers[speaker]])
254
+ else:
255
+ # Final fallback to first available speaker
256
+ safe_examples.append([text, SPEAKER_NAMES[0]])
257
+
258
+ return safe_examples
259
+
260
  def build_interface():
261
  """Build the Gradio interface - simplified like your old working version"""
262
 
 
264
  gr.Markdown("""
265
  # 🎤 Bambara Text-to-Speech
266
 
267
+ **Powered by MALIBA-AI** | *First Open-Source Bambara TTS*
268
 
269
+ Convert Bambara text to natural-sounding speech using our state-of-the-art neural TTS system.
270
 
271
  **Bambara** is spoken by millions of people in Mali and West Africa.
272
  """)
 
285
  choices=SPEAKER_NAMES,
286
  value="Bourama" if "Bourama" in SPEAKER_NAMES else SPEAKER_NAMES[0],
287
  label="🗣️ Speaker Voice",
288
+ info=f"Choose from {len(SPEAKER_NAMES)} authentic Bambara voices"
289
  )
290
 
291
  generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
 
339
  label="Generated Speech",
340
  type="numpy",
341
  interactive=False,
342
+ format="wav"
343
  )
344
 
345
  status_output = gr.Textbox(
 
355
 
356
  gr.Markdown("**Click any example below:**")
357
 
358
+ # Use safe examples with fallbacks for missing speakers
359
+ safe_examples = get_safe_examples()
360
+
361
+ for i, (text, speaker) in enumerate(safe_examples):
362
  btn = gr.Button(f"{text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
363
  btn.click(
364
  fn=lambda t=text, s=speaker: load_example(t, s),
 
370
  ## About MALIBA-AI Bambara TTS
371
 
372
  - **🎯 Purpose**: First open-source Text-to-Speech system for Bambara language
373
+ - **🗣️ Speakers**: {len(SPEAKER_NAMES)} authentic Bambara voices
374
  - **🔊 Quality**: 16kHz neural speech synthesis
375
  - **⚡ Performance**: Model loads once and stays in memory
376
  - **📱 Usage**: Educational, accessibility, and cultural preservation
 
378
  ### 🎭 Available Speakers:
379
  {', '.join(SPEAKER_NAMES)}
380
 
 
 
 
 
 
 
 
 
 
 
 
 
381
  **License**: Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)
382
 
383
  ---
 
413
  def main():
414
  """Main function to launch the Gradio interface"""
415
  logger.info("Starting Bambara TTS Gradio interface.")
416
+
 
417
  interface = build_interface()
418
  interface.launch(
419
  server_name="0.0.0.0",