sudoping01 commited on
Commit
c5bcdee
Β·
verified Β·
1 Parent(s): 8159a55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -196
app.py CHANGED
@@ -1,22 +1,17 @@
1
  import os
2
 
3
- # Set environment variables BEFORE any imports
4
  os.environ["TORCHDYNAMO_DISABLE"] = "1"
5
  os.environ["TORCH_COMPILE_DISABLE"] = "1"
6
  os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
7
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
 
9
- # Set CUDA environment to help with unsloth GPU detection (only if not ZeroGPU)
10
- if not os.getenv("ZERO_GPU"):
11
- os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Force GPU visibility
12
- os.environ["FORCE_CUDA"] = "1" # Force CUDA usage
13
-
14
  import torch
15
  import gradio as gr
16
  import numpy as np
17
  import spaces
18
  import logging
19
  from huggingface_hub import login
 
20
  import time
21
 
22
  torch._dynamo.config.disable = True
@@ -29,23 +24,16 @@ hf_token = os.getenv("HF_TOKEN")
29
  if hf_token:
30
  login(token=hf_token)
31
 
32
- # Check GPU availability (but don't initialize CUDA yet in ZeroGPU)
33
- if os.getenv("ZERO_GPU"):
34
- device = "cuda" # Assume CUDA in ZeroGPU
35
- logger.info("ZeroGPU environment detected - CUDA will be available in decorated functions")
36
- elif torch.cuda.is_available():
37
- device = "cuda"
38
- logger.info("Using CUDA for inference.")
39
- elif torch.backends.mps.is_available():
40
- device = "mps"
41
- logger.info("Using MPS for inference.")
42
- else:
43
- device = "cpu"
44
- logger.info("Using CPU for inference.")
45
 
46
  def get_speakers_dict():
47
- """Get speakers dictionary using the new package structure"""
48
  try:
 
49
  from maliba_ai.config.settings import Speakers
50
  return {
51
  "Adama": Speakers.Adama,
@@ -60,62 +48,70 @@ def get_speakers_dict():
60
  "Amara": Speakers.Amara
61
  }
62
  except Exception as e:
63
- logger.error(f"Failed to import all speakers: {e}")
64
- # Fallback to core speakers only
65
  try:
66
- from maliba_ai.config.settings import Speakers
67
  return {
68
- "Adama": Speakers.Adama,
69
- "Moussa": Speakers.Moussa,
70
- "Bourama": Speakers.Bourama,
71
- "Modibo": Speakers.Modibo,
72
- "Seydou": Speakers.Seydou
73
  }
74
- except:
75
- logger.error("Failed to import even core speakers")
76
  return {}
77
 
78
- def initialize_tts_model():
79
- """Initialize TTS model globally - only if we're not in ZeroGPU environment"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  try:
81
- # Check if we're in ZeroGPU environment - don't initialize globally
82
- if os.getenv("ZERO_GPU") or "zero" in str(os.getenv("SPACE_ID", "")).lower():
83
- logger.info("ZeroGPU environment detected - skipping global initialization")
84
- return None
85
-
86
- # Only try global init if CUDA is actually available and initialized
87
- if not torch.cuda.is_available():
88
- logger.info("CUDA not available - skipping global initialization")
89
- return None
90
-
91
- logger.info("Attempting global TTS model initialization...")
92
  start_time = time.time()
93
 
94
- # Import and initialize the TTS model
95
- from maliba_ai.tts.inference import BambaraTTSInference
96
 
97
- # Initialize model
98
  model = BambaraTTSInference()
 
 
 
 
 
 
 
 
99
 
100
  elapsed = time.time() - start_time
101
- logger.info(f"TTS Model initialized successfully in {elapsed:.2f} seconds!")
102
 
103
- return model
104
 
105
  except Exception as e:
106
- logger.error(f"Failed to initialize TTS model globally: {e}")
107
- logger.info("Model will be initialized on first request with GPU decorator")
108
- return None
109
-
110
- # Initialize speakers dictionary (this doesn't require GPU)
111
- speakers_dict = get_speakers_dict()
112
- logger.info(f"Available speakers: {list(speakers_dict.keys())}")
113
-
114
- # Try to initialize model globally only if not in ZeroGPU environment
115
- tts_model = initialize_tts_model()
116
 
117
  def validate_inputs(text, temperature, top_k, top_p, max_tokens):
118
- """Validate user inputs"""
119
  if not text or not text.strip():
120
  return False, "Please enter some Bambara text."
121
 
@@ -128,44 +124,33 @@ def validate_inputs(text, temperature, top_k, top_p, max_tokens):
128
  if not (0.1 <= top_p <= 1.0):
129
  return False, "Top-P must be between 0.1 and 1.0"
130
 
131
- if len(text.strip()) > 1000:
132
- return False, "Text is too long. Please use shorter text (max 1000 characters)."
133
-
134
  return True, ""
135
 
136
  @spaces.GPU()
137
  def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p, max_tokens):
138
- """Generate speech - with fallback initialization if global init failed"""
139
- global tts_model
140
-
141
  if not text.strip():
142
  return None, "Please enter some Bambara text."
143
 
144
  try:
145
- # If global initialization failed, try to initialize here with GPU decorator
146
- if tts_model is None:
147
- logger.info("Global model initialization failed, initializing with GPU decorator...")
148
- from maliba_ai.tts import BambaraTTSInference
149
- tts_model = BambaraTTSInference()
150
- logger.info("Model initialized successfully with GPU decorator!")
151
 
152
- if not speakers_dict:
153
- return None, "❌ Speakers not properly loaded"
154
 
155
- if speaker_name not in speakers_dict:
156
- available_speakers = list(speakers_dict.keys())
157
  return None, f"❌ Speaker '{speaker_name}' not found. Available: {available_speakers}"
158
 
159
- speaker = speakers_dict[speaker_name]
160
- logger.info(f"Generating speech with speaker: {speaker_name}")
161
 
162
- # Validate inputs if using advanced settings
163
  if use_advanced:
164
  is_valid, error_msg = validate_inputs(text, temperature, top_k, top_p, max_tokens)
165
  if not is_valid:
166
  return None, f"❌ {error_msg}"
167
 
168
- waveform = tts_model.generate_speech(
169
  text=text.strip(),
170
  speaker_id=speaker,
171
  temperature=temperature,
@@ -174,104 +159,85 @@ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p,
174
  max_new_audio_tokens=int(max_tokens)
175
  )
176
  else:
177
- # Use default settings
178
- waveform = tts_model.generate_speech(
179
  text=text.strip(),
180
  speaker_id=speaker
181
  )
182
 
183
  if waveform is None or waveform.size == 0:
184
- return None, "❌ Failed to generate audio. Please try again with different text."
185
-
186
- # Ensure waveform is in correct format
187
- if isinstance(waveform, torch.Tensor):
188
- waveform = waveform.cpu().numpy()
189
-
190
- # Normalize audio to prevent clipping
191
- if np.max(np.abs(waveform)) > 0:
192
- waveform = waveform / np.max(np.abs(waveform)) * 0.9
193
 
194
  sample_rate = 16000
195
  return (sample_rate, waveform), f"βœ… Audio generated successfully for speaker {speaker_name}"
196
 
197
  except Exception as e:
198
- logger.error(f"Speech generation failed: {e}", exc_info=True)
199
  return None, f"❌ Error: {str(e)}"
200
 
201
- # Get available speakers for dropdown
202
- SPEAKER_NAMES = list(speakers_dict.keys()) if speakers_dict else ["Adama", "Moussa", "Bourama", "Modibo", "Seydou"]
 
 
 
 
 
 
203
 
204
  # Examples with variety of lengths and speakers matched to content
205
  examples = [
206
  ["Aw ni ce", "Adama"], # Natural conversational greeting
207
- ["Mali bΙ›na diya kΙ”sΙ›bΙ›, ka a da a kan baara bΙ› ka kΙ›.", "Bakary"], # Authoritative tone for serious topic
208
- ["Ne bΙ› se ka sΙ›bΙ›nni yΙ›lΙ›ma ka kΙ› kuma ye", "Moussa"], # Clear pronunciation for education
209
- ["I ka kΙ›nΙ› wa?", "Ngolo"], # Youthful energy for casual question
210
- ["LakΙ”li karamΙ”gΙ”w tun tΙ› ka se ka sΙ›bΙ›nni kΙ› ka Ι²Ι› walanda kan wa denmisΙ›nw tun tΙ› ka se ka o sΙ›bΙ›nni ninnu ye, kuma tΙ› ka u kalan. DenmisΙ›nw kΙ›ra kunfinw ye.", "Bourama"], # Most stable for long educational text
211
- ["sigikafΙ” kΙ”nΙ” jamanaw ni Ι²Ι”gΙ”n cΙ›, olu ye a haminankow ye, wa o ko ninnu ka kan ka kΙ› sariya ani tilennenya kΙ”nΙ”.", "Ibrahima"], # Calm and measured for formal text
212
- ["Aw ni ce. Ne tΙ”gΙ” ye Adama. AwΙ”, ne ye maliden de ye. Aw SanbΙ› SanbΙ›. San min tΙ› Ι²inan ye, an bΙ›Ι› ka jΙ› ka o seli Ι²Ι”gΙ”n fΙ›, hΙ›Ι›rΙ› ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa Ι²Ι›. Ala ka an ka seliw caya. Ala ka yafa an bΙ›Ι› ma.", "Amara"], # Melodic and smooth for heartfelt message
213
- ["An dΙ”lakelen bΙ› masike bilenman don ka tΙ”w gΙ›n.", "Modibo"], # Expressive delivery for dramatic statement
214
- ["Aw ni ce. Seidu bΙ› aw fo wa aw ka yafa a ma, ka da a kan tuma dΙ”w la kow ka can.", "Amadou"], # Warm and friendly greeting
215
- ["Bamanankan ye kan Ι²uman ye", "Seydou"], # Balanced characteristics for simple statement
216
  ]
217
 
218
  def build_interface():
219
- """Build the Gradio interface for Bambara TTS"""
220
 
221
- with gr.Blocks(
222
- title="Bambara TTS - MALIBA-AI",
223
- theme=gr.themes.Soft(),
224
- css="""
225
- .main-header { text-align: center; margin-bottom: 2rem; }
226
- .status-box { margin-top: 1rem; }
227
- """
228
- ) as demo:
229
 
230
- with gr.Row():
231
- gr.Markdown(f"""
232
- # 🎀 Bambara Text-to-Speech
233
-
234
- **Powered by MALIBA-AI** | *First Open-Source Bambara TTS*
235
-
236
- Convert Bambara text to natural-sounding speech using our state-of-the-art neural TTS system.
237
-
238
- **Bambara** is spoken by millions of people in Mali and West Africa 🌍
239
-
240
- **Status**: {'βœ… Model pre-loaded' if tts_model is not None else '⏳ Model loads on first request (ZeroGPU optimized)'}
241
- """, elem_classes=["main-header"])
242
 
243
  with gr.Row():
244
  with gr.Column(scale=2):
245
  text_input = gr.Textbox(
246
  label="πŸ“ Bambara Text",
247
- placeholder="I ni ce... (Type your Bambara text here)",
248
- lines=4,
249
- max_lines=8,
250
  value="I ni ce"
251
  )
252
 
253
  speaker_dropdown = gr.Dropdown(
254
  choices=SPEAKER_NAMES,
255
- value=SPEAKER_NAMES[0] if SPEAKER_NAMES else "Bourama", # Default to most stable speaker
256
  label="πŸ—£οΈ Speaker Voice",
257
- info=f"Choose from {len(SPEAKER_NAMES)} authentic voices (Bourama recommended for best quality)"
258
  )
259
 
260
- generate_btn = gr.Button(
261
- "🎡 Generate Speech",
262
- variant="primary",
263
- size="lg"
264
- )
265
 
266
  with gr.Column(scale=1):
267
  use_advanced = gr.Checkbox(
268
- label="βš™οΈ Advanced Settings",
269
  value=False,
270
- info="Customize generation parameters"
271
  )
272
 
273
  with gr.Group(visible=False) as advanced_group:
274
- gr.Markdown("**πŸ”§ Advanced Parameters:**")
275
 
276
  temperature = gr.Slider(
277
  minimum=0.1,
@@ -279,7 +245,7 @@ def build_interface():
279
  value=0.8,
280
  step=0.1,
281
  label="Temperature",
282
- info="Higher = more varied speech"
283
  )
284
 
285
  top_k = gr.Slider(
@@ -287,8 +253,7 @@ def build_interface():
287
  maximum=100,
288
  value=50,
289
  step=5,
290
- label="Top-K",
291
- info="Vocabulary selection size"
292
  )
293
 
294
  top_p = gr.Slider(
@@ -296,8 +261,7 @@ def build_interface():
296
  maximum=1.0,
297
  value=0.9,
298
  step=0.05,
299
- label="Top-P",
300
- info="Nucleus sampling threshold"
301
  )
302
 
303
  max_tokens = gr.Slider(
@@ -305,8 +269,7 @@ def build_interface():
305
  maximum=4096,
306
  value=2048,
307
  step=256,
308
- label="Max Audio Length",
309
- info="Maximum audio duration"
310
  )
311
 
312
  gr.Markdown("### πŸ”Š Generated Audio")
@@ -314,70 +277,41 @@ def build_interface():
314
  audio_output = gr.Audio(
315
  label="Generated Speech",
316
  type="numpy",
317
- interactive=False,
318
- show_download_button=True
319
  )
320
 
321
  status_output = gr.Textbox(
322
  label="Status",
323
  interactive=False,
324
  show_label=False,
325
- container=False,
326
- elem_classes=["status-box"]
327
  )
328
 
329
- with gr.Accordion("πŸ“š Try These Examples", open=True):
330
  def load_example(text, speaker):
331
  return text, speaker, False, 0.8, 50, 0.9, 2048
332
 
333
- gr.Markdown("**Click any example below to try it:**")
334
 
335
- with gr.Row():
336
- for i, (text, speaker) in enumerate(examples[:5]):
337
- btn = gr.Button(
338
- f"πŸ”Ή {text[:25]}{'...' if len(text) > 25 else ''}",
339
- size="sm"
340
- )
341
- btn.click(
342
- fn=lambda t=text, s=speaker: load_example(t, s),
343
- outputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens]
344
- )
345
-
346
- with gr.Row():
347
- for i, (text, speaker) in enumerate(examples[5:]):
348
- btn = gr.Button(
349
- f"πŸ”Ή {text[:25]}{'...' if len(text) > 25 else ''}",
350
- size="sm"
351
- )
352
- btn.click(
353
- fn=lambda t=text, s=speaker: load_example(t, s),
354
- outputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens]
355
- )
356
 
357
- with gr.Accordion("ℹ️ About", open=False):
358
  gr.Markdown(f"""
359
  ## About MALIBA-AI Bambara TTS
360
 
361
  - **🎯 Purpose**: First open-source Text-to-Speech system for Bambara language
362
  - **πŸ—£οΈ Speakers**: {len(SPEAKER_NAMES)} different authentic voices
363
  - **πŸ”Š Quality**: 16kHz neural speech synthesis
364
- - **⚑ Performance**: Optimized for real-time generation
365
  - **πŸ“± Usage**: Educational, accessibility, and cultural preservation
366
 
367
- ### 🎭 Speaker Characteristics:
368
-
369
- - **Bourama**: Most stable and accurate (recommended)
370
- - **Adama**: Natural conversational tone
371
- - **Moussa**: Clear pronunciation for educational content
372
- - **Modibo**: Expressive delivery for storytelling
373
- - **Seydou**: Balanced characteristics for general use
374
- - **Amadou**: Warm and friendly voice
375
- - **Bakary**: Deep, authoritative tone
376
- - **Ngolo**: Youthful and energetic
377
- - **Ibrahima**: Calm and measured delivery
378
- - **Amara**: Melodic and smooth
379
-
380
- **Model Architecture**: Built on state-of-the-art neural TTS with Bambara-specific optimizations
381
 
382
  **License**: Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)
383
 
@@ -386,7 +320,6 @@ def build_interface():
386
  **MALIBA-AI Mission**: Ensuring no Malian is left behind by technological advances πŸ‡²πŸ‡±
387
  """)
388
 
389
- # Event handlers
390
  def toggle_advanced(use_adv):
391
  return gr.Group(visible=use_adv)
392
 
@@ -396,7 +329,6 @@ def build_interface():
396
  outputs=[advanced_group]
397
  )
398
 
399
- # Generate speech on button click
400
  generate_btn.click(
401
  fn=generate_speech,
402
  inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
@@ -404,7 +336,6 @@ def build_interface():
404
  show_progress=True
405
  )
406
 
407
- # Generate speech on Enter key
408
  text_input.submit(
409
  fn=generate_speech,
410
  inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
@@ -416,20 +347,17 @@ def build_interface():
416
 
417
  def main():
418
  """Main function to launch the Gradio interface"""
419
- logger.info("Starting MALIBA-AI Bambara TTS Gradio interface...")
420
 
421
- # Build interface
422
  interface = build_interface()
423
-
424
- # Launch interface
425
  interface.launch(
426
  server_name="0.0.0.0",
427
  server_port=7860,
428
- share=False,
429
- show_error=True
430
  )
431
 
432
- logger.info("Gradio interface launched successfully!")
433
 
434
  if __name__ == "__main__":
435
  main()
 
1
  import os
2
 
 
3
  os.environ["TORCHDYNAMO_DISABLE"] = "1"
4
  os.environ["TORCH_COMPILE_DISABLE"] = "1"
5
  os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
6
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
7
 
 
 
 
 
 
8
  import torch
9
  import gradio as gr
10
  import numpy as np
11
  import spaces
12
  import logging
13
  from huggingface_hub import login
14
+ import threading
15
  import time
16
 
17
  torch._dynamo.config.disable = True
 
24
  if hf_token:
25
  login(token=hf_token)
26
 
27
+ # Global variables for model caching (like your old working version)
28
+ _tts_model = None
29
+ _speakers_dict = None
30
+ _model_initialized = False
31
+ _initialization_in_progress = False
 
 
 
 
 
 
 
 
32
 
33
  def get_speakers_dict():
34
+ """Get speakers dictionary using the correct import structure"""
35
  try:
36
+ # Try new structure first
37
  from maliba_ai.config.settings import Speakers
38
  return {
39
  "Adama": Speakers.Adama,
 
48
  "Amara": Speakers.Amara
49
  }
50
  except Exception as e:
51
+ logger.error(f"Failed to import from settings: {e}")
52
+ # Fallback to old structure (like your working version)
53
  try:
54
+ from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
55
  return {
56
+ "Adama": Adame,
57
+ "Moussa": Moussa,
58
+ "Bourama": Bourama,
59
+ "Modibo": Modibo,
60
+ "Seydou": Seydou
61
  }
62
+ except Exception as e2:
63
+ logger.error(f"Failed to import speakers: {e2}")
64
  return {}
65
 
66
+ @spaces.GPU()
67
+ def initialize_model_once():
68
+ """Initialize model exactly like your old working version"""
69
+ global _tts_model, _speakers_dict, _model_initialized, _initialization_in_progress
70
+
71
+ if _model_initialized:
72
+ logger.info("Model already initialized, returning existing instance")
73
+ return _tts_model, _speakers_dict
74
+
75
+ if _initialization_in_progress:
76
+ logger.info("Initialization already in progress, waiting...")
77
+ for _ in range(50):
78
+ time.sleep(0.1)
79
+ if _model_initialized:
80
+ return _tts_model, _speakers_dict
81
+
82
+ _initialization_in_progress = True
83
+
84
  try:
85
+ logger.info("Initializing Bambara TTS model...")
 
 
 
 
 
 
 
 
 
 
86
  start_time = time.time()
87
 
88
+ # Use the same import as your old working version
89
+ from maliba_ai.tts import BambaraTTSInference
90
 
 
91
  model = BambaraTTSInference()
92
+ speakers = get_speakers_dict()
93
+
94
+ if not speakers:
95
+ raise ValueError("Failed to load speakers dictionary")
96
+
97
+ _tts_model = model
98
+ _speakers_dict = speakers
99
+ _model_initialized = True
100
 
101
  elapsed = time.time() - start_time
102
+ logger.info(f"Model initialized successfully in {elapsed:.2f} seconds!")
103
 
104
+ return _tts_model, _speakers_dict
105
 
106
  except Exception as e:
107
+ logger.error(f"Failed to initialize model: {e}")
108
+ _initialization_in_progress = False
109
+ raise e
110
+ finally:
111
+ _initialization_in_progress = False
 
 
 
 
 
112
 
113
  def validate_inputs(text, temperature, top_k, top_p, max_tokens):
114
+ """Same validation as your old version"""
115
  if not text or not text.strip():
116
  return False, "Please enter some Bambara text."
117
 
 
124
  if not (0.1 <= top_p <= 1.0):
125
  return False, "Top-P must be between 0.1 and 1.0"
126
 
 
 
 
127
  return True, ""
128
 
129
  @spaces.GPU()
130
  def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p, max_tokens):
131
+ """Generate speech - exactly like your old working version"""
 
 
132
  if not text.strip():
133
  return None, "Please enter some Bambara text."
134
 
135
  try:
136
+ tts, speakers = initialize_model_once()
 
 
 
 
 
137
 
138
+ if not tts or not speakers:
139
+ return None, "❌ Model not properly initialized"
140
 
141
+ if speaker_name not in speakers:
142
+ available_speakers = list(speakers.keys())
143
  return None, f"❌ Speaker '{speaker_name}' not found. Available: {available_speakers}"
144
 
145
+ speaker = speakers[speaker_name]
146
+ logger.info(f"Using speaker: {speaker_name}")
147
 
 
148
  if use_advanced:
149
  is_valid, error_msg = validate_inputs(text, temperature, top_k, top_p, max_tokens)
150
  if not is_valid:
151
  return None, f"❌ {error_msg}"
152
 
153
+ waveform = tts.generate_speech(
154
  text=text.strip(),
155
  speaker_id=speaker,
156
  temperature=temperature,
 
159
  max_new_audio_tokens=int(max_tokens)
160
  )
161
  else:
162
+ waveform = tts.generate_speech(
 
163
  text=text.strip(),
164
  speaker_id=speaker
165
  )
166
 
167
  if waveform is None or waveform.size == 0:
168
+ return None, "Failed to generate audio. Please try again."
 
 
 
 
 
 
 
 
169
 
170
  sample_rate = 16000
171
  return (sample_rate, waveform), f"βœ… Audio generated successfully for speaker {speaker_name}"
172
 
173
  except Exception as e:
174
+ logger.error(f"Speech generation failed: {e}")
175
  return None, f"❌ Error: {str(e)}"
176
 
177
+ # Use available speakers (try to get 10, fallback to 5)
178
+ def get_speaker_names():
179
+ speakers = get_speakers_dict()
180
+ if speakers:
181
+ return list(speakers.keys())
182
+ return ["Adama", "Moussa", "Bourama", "Modibo", "Seydou"]
183
+
184
+ SPEAKER_NAMES = get_speaker_names()
185
 
186
  # Examples with variety of lengths and speakers matched to content
187
  examples = [
188
  ["Aw ni ce", "Adama"], # Natural conversational greeting
189
+ ["Mali bΙ›na diya kΙ”sΙ›bΙ›, ka a da a kan baara bΙ› ka kΙ›.", "Bakary" if "Bakary" in SPEAKER_NAMES else "Moussa"],
190
+ ["Ne bΙ› se ka sΙ›bΙ›nni yΙ›lΙ›ma ka kΙ› kuma ye", "Moussa"],
191
+ ["I ka kΙ›nΙ› wa?", "Ngolo" if "Ngolo" in SPEAKER_NAMES else "Modibo"],
192
+ ["LakΙ”li karamΙ”gΙ”w tun tΙ› ka se ka sΙ›bΙ›nni kΙ› ka Ι²Ι› walanda kan wa denmisΙ›nw tun tΙ› ka se ka o sΙ›bΙ›nni ninnu ye, kuma tΙ› ka u kalan. DenmisΙ›nw kΙ›ra kunfinw ye.", "Bourama"],
193
+ ["sigikafΙ” kΙ”nΙ” jamanaw ni Ι²Ι”gΙ”n cΙ›, olu ye a haminankow ye, wa o ko ninnu ka kan ka kΙ› sariya ani tilennenya kΙ”nΙ”.", "Ibrahima" if "Ibrahima" in SPEAKER_NAMES else "Seydou"],
194
+ ["Aw ni ce. Ne tΙ”gΙ” ye Adama. AwΙ”, ne ye maliden de ye. Aw SanbΙ› SanbΙ›. San min tΙ› Ι²inan ye, an bΙ›Ι› ka jΙ› ka o seli Ι²Ι”gΙ”n fΙ›, hΙ›Ι›rΙ› ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa Ι²Ι›. Ala ka an ka seliw caya. Ala ka yafa an bΙ›Ι› ma.", "Amara" if "Amara" in SPEAKER_NAMES else "Moussa"],
195
+ ["An dΙ”lakelen bΙ› masike bilenman don ka tΙ”w gΙ›n.", "Modibo"],
196
+ ["Aw ni ce. Seidu bΙ› aw fo wa aw ka yafa a ma, ka da a kan tuma dΙ”w la kow ka can.", "Amadou" if "Amadou" in SPEAKER_NAMES else "Modibo"],
 
197
  ]
198
 
199
  def build_interface():
200
+ """Build the Gradio interface - simplified like your old working version"""
201
 
202
+ with gr.Blocks(title="Bambara TTS - MALIBA-AI") as demo:
203
+ gr.Markdown("""
204
+ # 🎀 Bambara Text-to-Speech
 
 
 
 
 
205
 
206
+ **Powered by MALIBA-AI**
207
+
208
+ Convert Bambara text to speech using our state-of-the-art TTS model.
209
+
210
+ **Bambara** is spoken by millions of people in Mali and West Africa.
211
+ """)
 
 
 
 
 
 
212
 
213
  with gr.Row():
214
  with gr.Column(scale=2):
215
  text_input = gr.Textbox(
216
  label="πŸ“ Bambara Text",
217
+ placeholder="Type your Bambara text here...",
218
+ lines=3,
219
+ max_lines=10,
220
  value="I ni ce"
221
  )
222
 
223
  speaker_dropdown = gr.Dropdown(
224
  choices=SPEAKER_NAMES,
225
+ value="Bourama" if "Bourama" in SPEAKER_NAMES else SPEAKER_NAMES[0],
226
  label="πŸ—£οΈ Speaker Voice",
227
+ info=f"Choose from {len(SPEAKER_NAMES)} authentic voices"
228
  )
229
 
230
+ generate_btn = gr.Button("🎡 Generate Speech", variant="primary", size="lg")
 
 
 
 
231
 
232
  with gr.Column(scale=1):
233
  use_advanced = gr.Checkbox(
234
+ label="βš™οΈ Use Advanced Settings",
235
  value=False,
236
+ info="Enable to customize generation parameters"
237
  )
238
 
239
  with gr.Group(visible=False) as advanced_group:
240
+ gr.Markdown("**Advanced Parameters:**")
241
 
242
  temperature = gr.Slider(
243
  minimum=0.1,
 
245
  value=0.8,
246
  step=0.1,
247
  label="Temperature",
248
+ info="Higher = more varied"
249
  )
250
 
251
  top_k = gr.Slider(
 
253
  maximum=100,
254
  value=50,
255
  step=5,
256
+ label="Top-K"
 
257
  )
258
 
259
  top_p = gr.Slider(
 
261
  maximum=1.0,
262
  value=0.9,
263
  step=0.05,
264
+ label="Top-P"
 
265
  )
266
 
267
  max_tokens = gr.Slider(
 
269
  maximum=4096,
270
  value=2048,
271
  step=256,
272
+ label="Max Length"
 
273
  )
274
 
275
  gr.Markdown("### πŸ”Š Generated Audio")
 
277
  audio_output = gr.Audio(
278
  label="Generated Speech",
279
  type="numpy",
280
+ interactive=False
 
281
  )
282
 
283
  status_output = gr.Textbox(
284
  label="Status",
285
  interactive=False,
286
  show_label=False,
287
+ container=False
 
288
  )
289
 
290
+ with gr.Accordion("Try These Examples", open=True):
291
  def load_example(text, speaker):
292
  return text, speaker, False, 0.8, 50, 0.9, 2048
293
 
294
+ gr.Markdown("**Click any example below:**")
295
 
296
+ for i, (text, speaker) in enumerate(examples):
297
+ btn = gr.Button(f"{text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
298
+ btn.click(
299
+ fn=lambda t=text, s=speaker: load_example(t, s),
300
+ outputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens]
301
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
 
303
+ with gr.Accordion("About", open=False):
304
  gr.Markdown(f"""
305
  ## About MALIBA-AI Bambara TTS
306
 
307
  - **🎯 Purpose**: First open-source Text-to-Speech system for Bambara language
308
  - **πŸ—£οΈ Speakers**: {len(SPEAKER_NAMES)} different authentic voices
309
  - **πŸ”Š Quality**: 16kHz neural speech synthesis
310
+ - **⚑ Performance**: Model loads once and stays in memory
311
  - **πŸ“± Usage**: Educational, accessibility, and cultural preservation
312
 
313
+ ### 🎭 Available Speakers:
314
+ {', '.join(SPEAKER_NAMES)}
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
  **License**: Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)
317
 
 
320
  **MALIBA-AI Mission**: Ensuring no Malian is left behind by technological advances πŸ‡²πŸ‡±
321
  """)
322
 
 
323
  def toggle_advanced(use_adv):
324
  return gr.Group(visible=use_adv)
325
 
 
329
  outputs=[advanced_group]
330
  )
331
 
 
332
  generate_btn.click(
333
  fn=generate_speech,
334
  inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
 
336
  show_progress=True
337
  )
338
 
 
339
  text_input.submit(
340
  fn=generate_speech,
341
  inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
 
347
 
348
  def main():
349
  """Main function to launch the Gradio interface"""
350
+ logger.info("Starting Bambara TTS Gradio interface.")
351
 
352
+ # DO NOT preload - let it initialize on first request only (like your working version)
353
  interface = build_interface()
 
 
354
  interface.launch(
355
  server_name="0.0.0.0",
356
  server_port=7860,
357
+ share=False
 
358
  )
359
 
360
+ logger.info("Gradio interface launched successfully.")
361
 
362
  if __name__ == "__main__":
363
  main()