sudoping01 commited on
Commit
17640d7
·
verified ·
1 Parent(s): dabbe5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -62
app.py CHANGED
@@ -24,38 +24,50 @@ hf_token = os.getenv("HF_TOKEN")
24
  if hf_token:
25
  login(token=hf_token)
26
 
27
- # Global variables for model caching
28
  _tts_model = None
29
  _speakers_dict = None
30
  _model_initialized = False
31
  _initialization_in_progress = False
32
 
33
  def get_speakers_dict():
34
- """Get speakers dictionary using the correct SDK structure"""
35
  try:
36
- # Import the Speakers class (not individual speakers)
37
  from maliba_ai.config.settings import Speakers
38
 
39
- # Access all 10 speakers through the Speakers class
40
- speakers_dict = {
41
- "Adama": Speakers.Adama,
42
- "Moussa": Speakers.Moussa,
43
- "Bourama": Speakers.Bourama,
44
- "Modibo": Speakers.Modibo,
45
- "Seydou": Speakers.Seydou,
46
- "Amadou": Speakers.Amadou,
47
- "Bakary": Speakers.Bakary,
48
- "Ngolo": Speakers.Ngolo,
49
- "Amara": Speakers.Amara,
50
- "Ibrahima": Speakers.Ibrahima
51
- }
52
-
53
- logger.info(f"🎤 Successfully loaded {len(speakers_dict)} speakers: {list(speakers_dict.keys())}")
54
- return speakers_dict
55
 
 
 
 
 
 
 
 
 
 
 
56
  except Exception as e:
57
- logger.error(f"Failed to import Speakers class: {e}")
58
- return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  @spaces.GPU()
61
  def initialize_model_once():
@@ -79,8 +91,8 @@ def initialize_model_once():
79
  logger.info("Initializing Bambara TTS model...")
80
  start_time = time.time()
81
 
82
- # Use the correct import path
83
- from maliba_ai.tts.inference import BambaraTTSInference
84
 
85
  model = BambaraTTSInference()
86
  speakers = get_speakers_dict()
@@ -209,7 +221,7 @@ def get_speaker_names():
209
 
210
  SPEAKER_NAMES = get_speaker_names()
211
 
212
- # Examples representing ALL 10 speakers - with fallbacks for missing speakers
213
  examples = [
214
  ["Aw ni ce", "Adama"], # Natural conversational greeting
215
  ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Moussa"], # Clear pronunciation for informative content
@@ -227,31 +239,6 @@ examples = [
227
  ["Dɔnko ɲuman ye, a bɛ dɔn mɔgɔ kɔnɔ", "Amara"], # Melodic and smooth for poetic expression
228
  ]
229
 
230
- def get_safe_examples():
231
- """Get examples with speaker fallbacks for missing speakers"""
232
- safe_examples = []
233
-
234
- # Fallback mapping for missing speakers
235
- fallback_speakers = {
236
- "Amadou": "Adama", # Warm -> Natural conversational
237
- "Bakary": "Modibo", # Authoritative -> Expressive
238
- "Ngolo": "Adama", # Youthful -> Natural conversational
239
- "Ibrahima": "Seydou", # Calm -> Balanced
240
- "Amara": "Moussa" # Melodic -> Clear pronunciation
241
- }
242
-
243
- for text, speaker in examples:
244
- # Use original speaker if available, otherwise use fallback
245
- if speaker in SPEAKER_NAMES:
246
- safe_examples.append([text, speaker])
247
- elif speaker in fallback_speakers and fallback_speakers[speaker] in SPEAKER_NAMES:
248
- safe_examples.append([text, fallback_speakers[speaker]])
249
- else:
250
- # Final fallback to first available speaker
251
- safe_examples.append([text, SPEAKER_NAMES[0]])
252
-
253
- return safe_examples
254
-
255
  def build_interface():
256
  """Build the Gradio interface - simplified like your old working version"""
257
 
@@ -259,9 +246,9 @@ def build_interface():
259
  gr.Markdown("""
260
  # 🎤 Bambara Text-to-Speech
261
 
262
- **Powered by MALIBA-AI** | *First Open-Source Bambara TTS*
263
 
264
- Convert Bambara text to natural-sounding speech using our state-of-the-art neural TTS system.
265
 
266
  **Bambara** is spoken by millions of people in Mali and West Africa.
267
  """)
@@ -280,7 +267,7 @@ def build_interface():
280
  choices=SPEAKER_NAMES,
281
  value="Bourama" if "Bourama" in SPEAKER_NAMES else SPEAKER_NAMES[0],
282
  label="🗣️ Speaker Voice",
283
- info=f"Choose from {len(SPEAKER_NAMES)} authentic Bambara voices"
284
  )
285
 
286
  generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
@@ -334,7 +321,7 @@ def build_interface():
334
  label="Generated Speech",
335
  type="numpy",
336
  interactive=False,
337
- format="wav"
338
  )
339
 
340
  status_output = gr.Textbox(
@@ -350,10 +337,7 @@ def build_interface():
350
 
351
  gr.Markdown("**Click any example below:**")
352
 
353
- # Use safe examples with fallbacks for missing speakers
354
- safe_examples = get_safe_examples()
355
-
356
- for i, (text, speaker) in enumerate(safe_examples):
357
  btn = gr.Button(f"{text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
358
  btn.click(
359
  fn=lambda t=text, s=speaker: load_example(t, s),
@@ -365,7 +349,7 @@ def build_interface():
365
  ## About MALIBA-AI Bambara TTS
366
 
367
  - **🎯 Purpose**: First open-source Text-to-Speech system for Bambara language
368
- - **🗣️ Speakers**: {len(SPEAKER_NAMES)} authentic Bambara voices
369
  - **🔊 Quality**: 16kHz neural speech synthesis
370
  - **⚡ Performance**: Model loads once and stays in memory
371
  - **📱 Usage**: Educational, accessibility, and cultural preservation
@@ -373,6 +357,18 @@ def build_interface():
373
  ### 🎭 Available Speakers:
374
  {', '.join(SPEAKER_NAMES)}
375
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  **License**: Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)
377
 
378
  ---
@@ -408,7 +404,8 @@ def build_interface():
408
  def main():
409
  """Main function to launch the Gradio interface"""
410
  logger.info("Starting Bambara TTS Gradio interface.")
411
-
 
412
  interface = build_interface()
413
  interface.launch(
414
  server_name="0.0.0.0",
@@ -419,6 +416,4 @@ def main():
419
  logger.info("Gradio interface launched successfully.")
420
 
421
  if __name__ == "__main__":
422
- main()
423
-
424
-
 
24
  if hf_token:
25
  login(token=hf_token)
26
 
27
+ # Global variables for model caching (like your old working version)
28
  _tts_model = None
29
  _speakers_dict = None
30
  _model_initialized = False
31
  _initialization_in_progress = False
32
 
33
  def get_speakers_dict():
34
+ """Get speakers dictionary using the new SDK structure"""
35
  try:
36
+ # Try the new structure first - check what's actually available
37
  from maliba_ai.config.settings import Speakers
38
 
39
+ # Get all available speaker attributes dynamically
40
+ available_speakers = {}
41
+ # Updated speaker list with all 10 speakers in preferred order
42
+ speaker_names = ["Bourama", "Adama", "Moussa", "Modibo", "Seydou",
43
+ "Amadou", "Bakary", "Ngolo", "Ibrahima", "Amara"]
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ for name in speaker_names:
46
+ if hasattr(Speakers, name):
47
+ available_speakers[name] = getattr(Speakers, name)
48
+
49
+ if available_speakers:
50
+ logger.info(f"Loaded {len(available_speakers)} speakers from new structure: {list(available_speakers.keys())}")
51
+ return available_speakers
52
+ else:
53
+ raise AttributeError("No speakers found in new structure")
54
+
55
  except Exception as e:
56
+ logger.error(f"Failed to import from new settings structure: {e}")
57
+ # Fallback to old structure if new one fails
58
+ try:
59
+ from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
60
+ logger.info("Using fallback old speaker structure")
61
+ return {
62
+ "Adama": Adame,
63
+ "Moussa": Moussa,
64
+ "Bourama": Bourama,
65
+ "Modibo": Modibo,
66
+ "Seydou": Seydou
67
+ }
68
+ except Exception as e2:
69
+ logger.error(f"Failed to import speakers: {e2}")
70
+ return {}
71
 
72
  @spaces.GPU()
73
  def initialize_model_once():
 
91
  logger.info("Initializing Bambara TTS model...")
92
  start_time = time.time()
93
 
94
+ # Use the new import structure from the README
95
+ from maliba_ai.tts import BambaraTTSInference
96
 
97
  model = BambaraTTSInference()
98
  speakers = get_speakers_dict()
 
221
 
222
  SPEAKER_NAMES = get_speaker_names()
223
 
224
+ # Examples with variety of lengths and speakers matched to their characteristics
225
  examples = [
226
  ["Aw ni ce", "Adama"], # Natural conversational greeting
227
  ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Moussa"], # Clear pronunciation for informative content
 
239
  ["Dɔnko ɲuman ye, a bɛ dɔn mɔgɔ kɔnɔ", "Amara"], # Melodic and smooth for poetic expression
240
  ]
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  def build_interface():
243
  """Build the Gradio interface - simplified like your old working version"""
244
 
 
246
  gr.Markdown("""
247
  # 🎤 Bambara Text-to-Speech
248
 
249
+ **Powered by MALIBA-AI**
250
 
251
+ Convert Bambara text to speech using our state-of-the-art TTS model.
252
 
253
  **Bambara** is spoken by millions of people in Mali and West Africa.
254
  """)
 
267
  choices=SPEAKER_NAMES,
268
  value="Bourama" if "Bourama" in SPEAKER_NAMES else SPEAKER_NAMES[0],
269
  label="🗣️ Speaker Voice",
270
+ info=f"Choose from {len(SPEAKER_NAMES)} authentic voices (Bourama recommended)"
271
  )
272
 
273
  generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
 
321
  label="Generated Speech",
322
  type="numpy",
323
  interactive=False,
324
+ format="wav" # Specify WAV format to help with conversion
325
  )
326
 
327
  status_output = gr.Textbox(
 
337
 
338
  gr.Markdown("**Click any example below:**")
339
 
340
+ for i, (text, speaker) in enumerate(examples):
 
 
 
341
  btn = gr.Button(f"{text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
342
  btn.click(
343
  fn=lambda t=text, s=speaker: load_example(t, s),
 
349
  ## About MALIBA-AI Bambara TTS
350
 
351
  - **🎯 Purpose**: First open-source Text-to-Speech system for Bambara language
352
+ - **🗣️ Speakers**: {len(SPEAKER_NAMES)} different authentic voices
353
  - **🔊 Quality**: 16kHz neural speech synthesis
354
  - **⚡ Performance**: Model loads once and stays in memory
355
  - **📱 Usage**: Educational, accessibility, and cultural preservation
 
357
  ### 🎭 Available Speakers:
358
  {', '.join(SPEAKER_NAMES)}
359
 
360
+ ### 🎯 Speaker Characteristics:
361
+ - **Bourama**: Most stable and accurate (recommended)
362
+ - **Adama**: Natural conversational tone
363
+ - **Moussa**: Clear pronunciation for educational content
364
+ - **Modibo**: Expressive delivery for storytelling
365
+ - **Seydou**: Balanced characteristics for general use
366
+ - **Amadou**: Warm and friendly voice
367
+ - **Bakary**: Deep, authoritative tone
368
+ - **Ngolo**: Youthful and energetic
369
+ - **Ibrahima**: Calm and measured delivery
370
+ - **Amara**: Melodic and smooth
371
+
372
  **License**: Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)
373
 
374
  ---
 
404
  def main():
405
  """Main function to launch the Gradio interface"""
406
  logger.info("Starting Bambara TTS Gradio interface.")
407
+
408
+ # DO NOT preload - let it initialize on first request only (like your working version)
409
  interface = build_interface()
410
  interface.launch(
411
  server_name="0.0.0.0",
 
416
  logger.info("Gradio interface launched successfully.")
417
 
418
  if __name__ == "__main__":
419
+ main()