sudoping01 commited on
Commit
043ab85
Β·
verified Β·
1 Parent(s): 1a54035

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +213 -115
app.py CHANGED
@@ -1,17 +1,21 @@
1
  import os
2
 
 
3
  os.environ["TORCHDYNAMO_DISABLE"] = "1"
4
  os.environ["TORCH_COMPILE_DISABLE"] = "1"
5
  os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
6
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
7
 
 
 
 
 
8
  import torch
9
  import gradio as gr
10
  import numpy as np
11
  import spaces
12
  import logging
13
  from huggingface_hub import login
14
- import threading
15
  import time
16
 
17
  torch._dynamo.config.disable = True
@@ -24,75 +28,81 @@ hf_token = os.getenv("HF_TOKEN")
24
  if hf_token:
25
  login(token=hf_token)
26
 
27
-
28
- _tts_model = None
29
- _speakers_dict = None
30
- _model_initialized = False
31
- _initialization_in_progress = False
 
 
 
 
 
32
 
33
  def get_speakers_dict():
34
- """Get speakers dictionary - moved to function to avoid import issues"""
35
  try:
36
- from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
37
  return {
38
- "Adama": Adame,
39
- "Moussa": Moussa,
40
- "Bourama": Bourama,
41
- "Modibo": Modibo,
42
- "Seydou": Seydou
 
 
 
 
 
43
  }
44
  except Exception as e:
45
- logger.error(f"Failed to import speakers: {e}")
46
- return {}
47
-
48
- @spaces.GPU()
49
- def initialize_model_once():
50
- global _tts_model, _speakers_dict, _model_initialized, _initialization_in_progress
51
-
52
- if _model_initialized:
53
- logger.info("Model already initialized, returning existing instance")
54
- return _tts_model, _speakers_dict
55
-
56
- if _initialization_in_progress:
57
- logger.info("Initialization already in progress, waiting...")
 
58
 
59
- for _ in range(50):
60
- time.sleep(0.1)
61
- if _model_initialized:
62
- return _tts_model, _speakers_dict
63
-
64
-
65
- _initialization_in_progress = True
66
-
67
  try:
68
- logger.info("Initializing Bambara TTS model...")
69
  start_time = time.time()
70
 
71
- from maliba_ai.tts.inference import BambaraTTSInference
 
72
 
 
73
  model = BambaraTTSInference()
74
- speakers = get_speakers_dict()
75
-
76
- if not speakers:
77
- raise ValueError("Failed to load speakers dictionary")
78
-
79
- _tts_model = model
80
- _speakers_dict = speakers
81
- _model_initialized = True
82
 
83
  elapsed = time.time() - start_time
84
- logger.info(f"Model initialized successfully in {elapsed:.2f} seconds!")
85
 
86
- return _tts_model, _speakers_dict
87
 
88
  except Exception as e:
89
- logger.error(f"Failed to initialize model: {e}")
90
- _initialization_in_progress = False
91
- raise e
92
- finally:
93
- _initialization_in_progress = False
 
 
 
 
 
 
94
 
95
  def validate_inputs(text, temperature, top_k, top_p, max_tokens):
 
96
  if not text or not text.strip():
97
  return False, "Please enter some Bambara text."
98
 
@@ -105,32 +115,44 @@ def validate_inputs(text, temperature, top_k, top_p, max_tokens):
105
  if not (0.1 <= top_p <= 1.0):
106
  return False, "Top-P must be between 0.1 and 1.0"
107
 
 
 
 
108
  return True, ""
109
 
110
  @spaces.GPU()
111
  def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p, max_tokens):
 
 
 
112
  if not text.strip():
113
  return None, "Please enter some Bambara text."
114
 
115
  try:
116
- tts, speakers = initialize_model_once()
 
 
 
 
 
117
 
118
- if not tts or not speakers:
119
- return None, "❌ Model not properly initialized"
120
 
121
- if speaker_name not in speakers:
122
- available_speakers = list(speakers.keys())
123
  return None, f"❌ Speaker '{speaker_name}' not found. Available: {available_speakers}"
124
 
125
- speaker = speakers[speaker_name]
126
- logger.info(f"Using speaker: {speaker_name}")
127
 
 
128
  if use_advanced:
129
  is_valid, error_msg = validate_inputs(text, temperature, top_k, top_p, max_tokens)
130
  if not is_valid:
131
  return None, f"❌ {error_msg}"
132
 
133
- waveform = tts.generate_speech(
134
  text=text.strip(),
135
  speaker_id=speaker,
136
  temperature=temperature,
@@ -139,76 +161,104 @@ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p,
139
  max_new_audio_tokens=int(max_tokens)
140
  )
141
  else:
142
- waveform = tts.generate_speech(
 
143
  text=text.strip(),
144
  speaker_id=speaker
145
  )
146
 
147
  if waveform is None or waveform.size == 0:
148
- return None, "Failed to generate audio. Please try again."
 
 
 
 
 
 
 
 
149
 
150
  sample_rate = 16000
151
  return (sample_rate, waveform), f"βœ… Audio generated successfully for speaker {speaker_name}"
152
 
153
  except Exception as e:
154
- logger.error(f"Speech generation failed: {e}")
155
  return None, f"❌ Error: {str(e)}"
156
 
157
- SPEAKER_NAMES = ["Adama", "Moussa", "Bourama", "Modibo", "Seydou"]
 
 
158
 
159
  examples = [
160
- ["Aw ni ce", "Adama"],
161
- ["Mali bΙ›na diya kΙ”sΙ›bΙ›, ka a da a kan baara bΙ› ka kΙ›.", "Moussa"],
162
- ["Ne bΙ› se ka sΙ›bΙ›nni yΙ›lΙ›ma ka kΙ› kuma ye", "Bourama"],
163
- ["I ka kΙ›nΙ› wa?", "Modibo"],
164
- ["LakΙ”li karamΙ”gΙ”w tun tΙ› ka se ka sΙ›bΙ›nni kΙ› ka Ι²Ι› walanda kan wa denmisΙ›nw tun tΙ› ka se ka o sΙ›bΙ›nni ninnu ye, kuma tΙ› ka u kalan. DenmisΙ›nw kΙ›ra kunfinw ye.", "Adama"],
165
- ["sigikafΙ” kΙ”nΙ” jamanaw ni Ι²Ι”gΙ”n cΙ›, olu ye a haminankow ye, wa o ko ninnu ka kan ka kΙ› sariya ani tilennenya kΙ”nΙ”.", "Seydou"],
166
- ["Aw ni ce. Ne tΙ”gΙ” ye Adama. AwΙ”, ne ye maliden de ye. Aw SanbΙ› SanbΙ›. San min tΙ› Ι²inan ye, an bΙ›Ι› ka jΙ› ka o seli Ι²Ι”gΙ”n fΙ›, hΙ›Ι›rΙ› ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa Ι²Ι›. Ala ka an ka seliw caya. Ala ka yafa an bΙ›Ι› ma.", "Moussa"],
167
- ["An dΙ”lakelen bΙ› masike bilenman don ka tΙ”w gΙ›n.", "Bourama"],
168
- ["Aw ni ce. Seidu bΙ› aw fo wa aw ka yafa a ma, ka da a kan tuma dΙ”w la kow ka can.", "Modibo"],
 
169
  ]
170
 
171
  def build_interface():
172
  """Build the Gradio interface for Bambara TTS"""
173
 
174
- with gr.Blocks(title="Bambara TTS - EXPERIMENTAL") as demo:
175
- gr.Markdown("""
176
- # 🎀 Bambara Text-to-Speech ⚠️ EXPERIMENTAL
177
-
178
- **Powered by MALIBA-AI**
 
 
 
179
 
180
- Convert Bambara text to speech. This model is currently experimental.
181
-
182
- **Bambara** is spoken by millions of people in Mali and West Africa.
183
- """)
 
 
 
 
 
 
 
 
184
 
185
  with gr.Row():
186
  with gr.Column(scale=2):
187
  text_input = gr.Textbox(
188
  label="πŸ“ Bambara Text",
189
- placeholder="Type your Bambara text here...",
190
- lines=3,
191
- max_lines=10,
192
  value="I ni ce"
193
  )
194
 
195
  speaker_dropdown = gr.Dropdown(
196
  choices=SPEAKER_NAMES,
197
- value="Adama",
198
- label="πŸ—£οΈ Speaker Voice"
 
199
  )
200
 
201
- generate_btn = gr.Button("🎡 Generate Speech", variant="primary", size="lg")
 
 
 
 
202
 
203
  with gr.Column(scale=1):
204
  use_advanced = gr.Checkbox(
205
- label="βš™οΈ Use Advanced Settings",
206
  value=False,
207
- info="Enable to customize generation parameters"
208
  )
209
 
210
  with gr.Group(visible=False) as advanced_group:
211
- gr.Markdown("**Advanced Parameters:**")
212
 
213
  temperature = gr.Slider(
214
  minimum=0.1,
@@ -216,7 +266,7 @@ def build_interface():
216
  value=0.8,
217
  step=0.1,
218
  label="Temperature",
219
- info="Higher = more varied"
220
  )
221
 
222
  top_k = gr.Slider(
@@ -224,7 +274,8 @@ def build_interface():
224
  maximum=100,
225
  value=50,
226
  step=5,
227
- label="Top-K"
 
228
  )
229
 
230
  top_p = gr.Slider(
@@ -232,7 +283,8 @@ def build_interface():
232
  maximum=1.0,
233
  value=0.9,
234
  step=0.05,
235
- label="Top-P"
 
236
  )
237
 
238
  max_tokens = gr.Slider(
@@ -240,7 +292,8 @@ def build_interface():
240
  maximum=4096,
241
  value=2048,
242
  step=256,
243
- label="Max Length"
 
244
  )
245
 
246
  gr.Markdown("### πŸ”Š Generated Audio")
@@ -248,39 +301,79 @@ def build_interface():
248
  audio_output = gr.Audio(
249
  label="Generated Speech",
250
  type="numpy",
251
- interactive=False
 
252
  )
253
 
254
  status_output = gr.Textbox(
255
  label="Status",
256
  interactive=False,
257
  show_label=False,
258
- container=False
 
259
  )
260
 
261
- with gr.Accordion("Try These Examples", open=True):
262
  def load_example(text, speaker):
263
  return text, speaker, False, 0.8, 50, 0.9, 2048
264
 
265
- gr.Markdown("**Click any example below:**")
266
 
267
- for i, (text, speaker) in enumerate(examples):
268
- btn = gr.Button(f"{text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
269
- btn.click(
270
- fn=lambda t=text, s=speaker: load_example(t, s),
271
- outputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens]
272
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
- with gr.Accordion("About", open=False):
275
- gr.Markdown("""
276
- **⚠️ This is an experimental Bambara TTS model.**
277
- - **Languages**: Bambara (bm)
278
- - **Speakers**: 5 different voice options
279
- - **Sample Rate**: 16kHz
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
- **Model loads once on first request and stays in memory**
 
 
 
 
 
 
282
  """)
283
 
 
284
  def toggle_advanced(use_adv):
285
  return gr.Group(visible=use_adv)
286
 
@@ -290,6 +383,7 @@ def build_interface():
290
  outputs=[advanced_group]
291
  )
292
 
 
293
  generate_btn.click(
294
  fn=generate_speech,
295
  inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
@@ -297,6 +391,7 @@ def build_interface():
297
  show_progress=True
298
  )
299
 
 
300
  text_input.submit(
301
  fn=generate_speech,
302
  inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
@@ -308,17 +403,20 @@ def build_interface():
308
 
309
  def main():
310
  """Main function to launch the Gradio interface"""
311
- logger.info("Starting Bambara TTS Gradio interface.")
312
 
313
- # DO NOT preload - let it initialize on first request only
314
  interface = build_interface()
 
 
315
  interface.launch(
316
  server_name="0.0.0.0",
317
  server_port=7860,
318
- share=False
 
319
  )
320
 
321
- logger.info("Gradio interface launched successfully.")
322
 
323
  if __name__ == "__main__":
324
  main()
 
1
  import os
2
 
3
+ # Set environment variables BEFORE any imports
4
  os.environ["TORCHDYNAMO_DISABLE"] = "1"
5
  os.environ["TORCH_COMPILE_DISABLE"] = "1"
6
  os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
7
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
 
9
+ # Set CUDA environment to help with unsloth GPU detection
10
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Force GPU visibility
11
+ os.environ["FORCE_CUDA"] = "1" # Force CUDA usage
12
+
13
  import torch
14
  import gradio as gr
15
  import numpy as np
16
  import spaces
17
  import logging
18
  from huggingface_hub import login
 
19
  import time
20
 
21
  torch._dynamo.config.disable = True
 
28
  if hf_token:
29
  login(token=hf_token)
30
 
31
+ # Check GPU availability
32
+ if torch.cuda.is_available():
33
+ device = "cuda"
34
+ logger.info("Using CUDA for inference.")
35
+ elif torch.backends.mps.is_available():
36
+ device = "mps"
37
+ logger.info("Using MPS for inference.")
38
+ else:
39
+ device = "cpu"
40
+ logger.info("Using CPU for inference.")
41
 
42
  def get_speakers_dict():
43
+ """Get speakers dictionary using the new package structure"""
44
  try:
45
+ from maliba_ai.config.settings import Speakers
46
  return {
47
+ "Adama": Speakers.Adama,
48
+ "Moussa": Speakers.Moussa,
49
+ "Bourama": Speakers.Bourama,
50
+ "Modibo": Speakers.Modibo,
51
+ "Seydou": Speakers.Seydou,
52
+ "Amadou": Speakers.Amadou,
53
+ "Bakary": Speakers.Bakary,
54
+ "Ngolo": Speakers.Ngolo,
55
+ "Ibrahima": Speakers.Ibrahima,
56
+ "Amara": Speakers.Amara
57
  }
58
  except Exception as e:
59
+ logger.error(f"Failed to import all speakers: {e}")
60
+ # Fallback to core speakers only
61
+ try:
62
+ from maliba_ai.config.settings import Speakers
63
+ return {
64
+ "Adama": Speakers.Adama,
65
+ "Moussa": Speakers.Moussa,
66
+ "Bourama": Speakers.Bourama,
67
+ "Modibo": Speakers.Modibo,
68
+ "Seydou": Speakers.Seydou
69
+ }
70
+ except:
71
+ logger.error("Failed to import even core speakers")
72
+ return {}
73
 
74
+ def initialize_tts_model():
75
+ """Initialize TTS model globally - similar to ASR space pattern"""
 
 
 
 
 
 
76
  try:
77
+ logger.info("Initializing Bambara TTS model globally...")
78
  start_time = time.time()
79
 
80
+ # Import and initialize the TTS model
81
+ from maliba_ai.tts import BambaraTTSInference
82
 
83
+ # Initialize model
84
  model = BambaraTTSInference()
 
 
 
 
 
 
 
 
85
 
86
  elapsed = time.time() - start_time
87
+ logger.info(f"TTS Model initialized successfully in {elapsed:.2f} seconds!")
88
 
89
+ return model
90
 
91
  except Exception as e:
92
+ logger.error(f"Failed to initialize TTS model: {e}")
93
+ logger.info("Model will be initialized on first request instead")
94
+ return None
95
+
96
+ # Initialize speakers dictionary
97
+ speakers_dict = get_speakers_dict()
98
+ logger.info(f"Available speakers: {list(speakers_dict.keys())}")
99
+
100
+ # Try to initialize model globally (like ASR space)
101
+ # If it fails due to GPU detection, it will be None and we'll init on first request
102
+ tts_model = initialize_tts_model()
103
 
104
  def validate_inputs(text, temperature, top_k, top_p, max_tokens):
105
+ """Validate user inputs"""
106
  if not text or not text.strip():
107
  return False, "Please enter some Bambara text."
108
 
 
115
  if not (0.1 <= top_p <= 1.0):
116
  return False, "Top-P must be between 0.1 and 1.0"
117
 
118
+ if len(text.strip()) > 1000:
119
+ return False, "Text is too long. Please use shorter text (max 1000 characters)."
120
+
121
  return True, ""
122
 
123
  @spaces.GPU()
124
  def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p, max_tokens):
125
+ """Generate speech - with fallback initialization if global init failed"""
126
+ global tts_model
127
+
128
  if not text.strip():
129
  return None, "Please enter some Bambara text."
130
 
131
  try:
132
+ # If global initialization failed, try to initialize here with GPU decorator
133
+ if tts_model is None:
134
+ logger.info("Global model initialization failed, initializing with GPU decorator...")
135
+ from maliba_ai.tts import BambaraTTSInference
136
+ tts_model = BambaraTTSInference()
137
+ logger.info("Model initialized successfully with GPU decorator!")
138
 
139
+ if not speakers_dict:
140
+ return None, "❌ Speakers not properly loaded"
141
 
142
+ if speaker_name not in speakers_dict:
143
+ available_speakers = list(speakers_dict.keys())
144
  return None, f"❌ Speaker '{speaker_name}' not found. Available: {available_speakers}"
145
 
146
+ speaker = speakers_dict[speaker_name]
147
+ logger.info(f"Generating speech with speaker: {speaker_name}")
148
 
149
+ # Validate inputs if using advanced settings
150
  if use_advanced:
151
  is_valid, error_msg = validate_inputs(text, temperature, top_k, top_p, max_tokens)
152
  if not is_valid:
153
  return None, f"❌ {error_msg}"
154
 
155
+ waveform = tts_model.generate_speech(
156
  text=text.strip(),
157
  speaker_id=speaker,
158
  temperature=temperature,
 
161
  max_new_audio_tokens=int(max_tokens)
162
  )
163
  else:
164
+ # Use default settings
165
+ waveform = tts_model.generate_speech(
166
  text=text.strip(),
167
  speaker_id=speaker
168
  )
169
 
170
  if waveform is None or waveform.size == 0:
171
+ return None, "❌ Failed to generate audio. Please try again with different text."
172
+
173
+ # Ensure waveform is in correct format
174
+ if isinstance(waveform, torch.Tensor):
175
+ waveform = waveform.cpu().numpy()
176
+
177
+ # Normalize audio to prevent clipping
178
+ if np.max(np.abs(waveform)) > 0:
179
+ waveform = waveform / np.max(np.abs(waveform)) * 0.9
180
 
181
  sample_rate = 16000
182
  return (sample_rate, waveform), f"βœ… Audio generated successfully for speaker {speaker_name}"
183
 
184
  except Exception as e:
185
+ logger.error(f"Speech generation failed: {e}", exc_info=True)
186
  return None, f"❌ Error: {str(e)}"
187
 
188
+ # Get available speakers for dropdown
189
+ SPEAKER_NAMES = list(speakers_dict.keys()) if speakers_dict else ["Adama", "Moussa", "Bourama", "Modibo", "Seydou"]
190
+
191
 
192
  examples = [
193
+ ["Aw ni ce", "Adama"],
194
+ ["Mali bΙ›na diya kΙ”sΙ›bΙ›, ka a da a kan baara bΙ› ka kΙ›.", "Bakary"],
195
+ ["Ne bΙ› se ka sΙ›bΙ›nni yΙ›lΙ›ma ka kΙ› kuma ye", "Moussa"],
196
+ ["I ka kΙ›nΙ› wa?", "Ngolo"],
197
+ ["LakΙ”li karamΙ”gΙ”w tun tΙ› ka se ka sΙ›bΙ›nni kΙ› ka Ι²Ι› walanda kan wa denmisΙ›nw tun tΙ› ka se ka o sΙ›bΙ›nni ninnu ye, kuma tΙ› ka u kalan. DenmisΙ›nw kΙ›ra kunfinw ye.", "Bourama"],
198
+ ["sigikafΙ” kΙ”nΙ” jamanaw ni Ι²Ι”gΙ”n cΙ›, olu ye a haminankow ye, wa o ko ninnu ka kan ka kΙ› sariya ani tilennenya kΙ”nΙ”.", "Ibrahima"],
199
+ ["Aw ni ce. Ne tΙ”gΙ” ye Adama. AwΙ”, ne ye maliden de ye. Aw SanbΙ› SanbΙ›. San min tΙ› Ι²inan ye, an bΙ›Ι› ka jΙ› ka o seli Ι²Ι”gΙ”n fΙ›, hΙ›Ι›rΙ› ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa Ι²Ι›. Ala ka an ka seliw caya. Ala ka yafa an bΙ›Ι› ma.", "Amara"],
200
+ ["An dΙ”lakelen bΙ› masike bilenman don ka tΙ”w gΙ›n.", "Modibo"],
201
+ ["Aw ni ce. Seidu bΙ› aw fo wa aw ka yafa a ma, ka da a kan tuma dΙ”w la kow ka can.", "Amadou"],
202
+ ["Bamanankan ye kan Ι²uman ye", "Seydou"],
203
  ]
204
 
205
  def build_interface():
206
  """Build the Gradio interface for Bambara TTS"""
207
 
208
+ with gr.Blocks(
209
+ title="Bambara TTS - MALIBA-AI",
210
+ theme=gr.themes.Soft(),
211
+ css="""
212
+ .main-header { text-align: center; margin-bottom: 2rem; }
213
+ .status-box { margin-top: 1rem; }
214
+ """
215
+ ) as demo:
216
 
217
+ with gr.Row():
218
+ gr.Markdown(f"""
219
+ # 🎀 Bambara Text-to-Speech
220
+
221
+ **Powered by MALIBA-AI** | *First Open-Source Bambara TTS*
222
+
223
+ Convert Bambara text to natural-sounding speech using our state-of-the-art neural TTS system.
224
+
225
+ **Bambara** is spoken by millions of people in Mali and West Africa 🌍
226
+
227
+ **Status**: {'βœ… Model loaded' if tts_model is not None else '⏳ Model will load on first request'}
228
+ """, elem_classes=["main-header"])
229
 
230
  with gr.Row():
231
  with gr.Column(scale=2):
232
  text_input = gr.Textbox(
233
  label="πŸ“ Bambara Text",
234
+ placeholder="I ni ce... (Type your Bambara text here)",
235
+ lines=4,
236
+ max_lines=8,
237
  value="I ni ce"
238
  )
239
 
240
  speaker_dropdown = gr.Dropdown(
241
  choices=SPEAKER_NAMES,
242
+ value=SPEAKER_NAMES[0] if SPEAKER_NAMES else "Bourama", # Default to most stable speaker
243
+ label="πŸ—£οΈ Speaker Voice",
244
+ info=f"Choose from {len(SPEAKER_NAMES)} authentic voices (Bourama recommended for best quality)"
245
  )
246
 
247
+ generate_btn = gr.Button(
248
+ "🎡 Generate Speech",
249
+ variant="primary",
250
+ size="lg"
251
+ )
252
 
253
  with gr.Column(scale=1):
254
  use_advanced = gr.Checkbox(
255
+ label="βš™οΈ Advanced Settings",
256
  value=False,
257
+ info="Customize generation parameters"
258
  )
259
 
260
  with gr.Group(visible=False) as advanced_group:
261
+ gr.Markdown("**πŸ”§ Advanced Parameters:**")
262
 
263
  temperature = gr.Slider(
264
  minimum=0.1,
 
266
  value=0.8,
267
  step=0.1,
268
  label="Temperature",
269
+ info="Higher = more varied speech"
270
  )
271
 
272
  top_k = gr.Slider(
 
274
  maximum=100,
275
  value=50,
276
  step=5,
277
+ label="Top-K",
278
+ info="Vocabulary selection size"
279
  )
280
 
281
  top_p = gr.Slider(
 
283
  maximum=1.0,
284
  value=0.9,
285
  step=0.05,
286
+ label="Top-P",
287
+ info="Nucleus sampling threshold"
288
  )
289
 
290
  max_tokens = gr.Slider(
 
292
  maximum=4096,
293
  value=2048,
294
  step=256,
295
+ label="Max Audio Length",
296
+ info="Maximum audio duration"
297
  )
298
 
299
  gr.Markdown("### πŸ”Š Generated Audio")
 
301
  audio_output = gr.Audio(
302
  label="Generated Speech",
303
  type="numpy",
304
+ interactive=False,
305
+ show_download_button=True
306
  )
307
 
308
  status_output = gr.Textbox(
309
  label="Status",
310
  interactive=False,
311
  show_label=False,
312
+ container=False,
313
+ elem_classes=["status-box"]
314
  )
315
 
316
+ with gr.Accordion("πŸ“š Try These Examples", open=True):
317
  def load_example(text, speaker):
318
  return text, speaker, False, 0.8, 50, 0.9, 2048
319
 
320
+ gr.Markdown("**Click any example below to try it:**")
321
 
322
+ with gr.Row():
323
+ for i, (text, speaker) in enumerate(examples[:5]):
324
+ btn = gr.Button(
325
+ f"πŸ”Ή {text[:25]}{'...' if len(text) > 25 else ''}",
326
+ size="sm"
327
+ )
328
+ btn.click(
329
+ fn=lambda t=text, s=speaker: load_example(t, s),
330
+ outputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens]
331
+ )
332
+
333
+ with gr.Row():
334
+ for i, (text, speaker) in enumerate(examples[5:]):
335
+ btn = gr.Button(
336
+ f"πŸ”Ή {text[:25]}{'...' if len(text) > 25 else ''}",
337
+ size="sm"
338
+ )
339
+ btn.click(
340
+ fn=lambda t=text, s=speaker: load_example(t, s),
341
+ outputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens]
342
+ )
343
 
344
+ with gr.Accordion("ℹ️ About", open=False):
345
+ gr.Markdown(f"""
346
+ ## About MALIBA-AI Bambara TTS
347
+
348
+ - **🎯 Purpose**: First open-source Text-to-Speech system for Bambara language
349
+ - **πŸ—£οΈ Speakers**: {len(SPEAKER_NAMES)} different authentic voices
350
+ - **πŸ”Š Quality**: 16kHz neural speech synthesis
351
+ - **⚑ Performance**: Optimized for real-time generation
352
+ - **πŸ“± Usage**: Educational, accessibility, and cultural preservation
353
+
354
+ ### 🎭 Speaker Characteristics:
355
+
356
+ - **Bourama**: Most stable and accurate (recommended)
357
+ - **Adama**: Natural conversational tone
358
+ - **Moussa**: Clear pronunciation for educational content
359
+ - **Modibo**: Expressive delivery for storytelling
360
+ - **Seydou**: Balanced characteristics for general use
361
+ - **Amadou**: Warm and friendly voice
362
+ - **Bakary**: Deep, authoritative tone
363
+ - **Ngolo**: Youthful and energetic
364
+ - **Ibrahima**: Calm and measured delivery
365
+ - **Amara**: Melodic and smooth
366
 
367
+ **Model Architecture**: Built on state-of-the-art neural TTS with Bambara-specific optimizations
368
+
369
+ **License**: Creative Commons Attribution-NonCommercial-ShareAlike 4.0 (CC BY-NC-SA 4.0)
370
+
371
+ ---
372
+
373
+ **MALIBA-AI Mission**: Ensuring no Malian is left behind by technological advances πŸ‡²πŸ‡±
374
  """)
375
 
376
+ # Event handlers
377
  def toggle_advanced(use_adv):
378
  return gr.Group(visible=use_adv)
379
 
 
383
  outputs=[advanced_group]
384
  )
385
 
386
+ # Generate speech on button click
387
  generate_btn.click(
388
  fn=generate_speech,
389
  inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
 
391
  show_progress=True
392
  )
393
 
394
+ # Generate speech on Enter key
395
  text_input.submit(
396
  fn=generate_speech,
397
  inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
 
403
 
404
  def main():
405
  """Main function to launch the Gradio interface"""
406
+ logger.info("Starting MALIBA-AI Bambara TTS Gradio interface...")
407
 
408
+ # Build interface
409
  interface = build_interface()
410
+
411
+ # Launch interface
412
  interface.launch(
413
  server_name="0.0.0.0",
414
  server_port=7860,
415
+ share=False,
416
+ show_error=True
417
  )
418
 
419
+ logger.info("Gradio interface launched successfully!")
420
 
421
  if __name__ == "__main__":
422
  main()