sudoping01 commited on
Commit
4925aa8
·
verified ·
1 Parent(s): d42b66c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -37
app.py CHANGED
@@ -1,12 +1,12 @@
1
  import os
2
 
3
- # Disable problematic optimizations for ZeroGPU compatibility
4
  os.environ["TORCHDYNAMO_DISABLE"] = "1"
5
  os.environ["TORCH_COMPILE_DISABLE"] = "1"
6
  os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
7
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
 
9
- # Check if we're in ZeroGPU or similar restricted environment
10
  def is_restricted_environment():
11
  return (
12
  os.getenv("ZERO_GPU") or
@@ -15,7 +15,7 @@ def is_restricted_environment():
15
  "spaces" in str(os.getenv("HOSTNAME", "")).lower()
16
  )
17
 
18
- # Disable Unsloth optimizations in restricted environments
19
  if is_restricted_environment():
20
  os.environ["UNSLOTH_DISABLE"] = "1"
21
  os.environ["DISABLE_UNSLOTH"] = "1"
@@ -44,7 +44,7 @@ hf_token = os.getenv("HF_TOKEN")
44
  if hf_token:
45
  login(token=hf_token)
46
 
47
- # Global variables for model caching
48
  _tts_model = None
49
  _speakers_dict = None
50
  _model_initialized = False
@@ -53,10 +53,7 @@ _initialization_in_progress = False
53
  def get_speakers_dict():
54
  """Get speakers dictionary using the correct SDK structure"""
55
  try:
56
- # Import the Speakers class (not individual speakers)
57
  from maliba_ai.config.settings import Speakers
58
-
59
- # Access all 10 speakers through the Speakers class
60
  speakers_dict = {
61
  "Adama": Speakers.Adama,
62
  "Moussa": Speakers.Moussa,
@@ -190,11 +187,9 @@ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p,
190
  if waveform is None or waveform.size == 0:
191
  return None, "Failed to generate audio. Please try again."
192
 
193
- # Convert to numpy if it's a tensor
194
  if isinstance(waveform, torch.Tensor):
195
  waveform = waveform.cpu().numpy()
196
 
197
- # Ensure proper audio format (convert float32 to int16 range but keep as float for Gradio)
198
  if waveform.dtype == np.float32:
199
  # Normalize to [-1, 1] range if needed
200
  if np.max(np.abs(waveform)) > 1.0:
@@ -209,22 +204,20 @@ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p,
209
  logger.error(f"Speech generation failed: {e}")
210
  return None, f"❌ Error: {str(e)}"
211
 
212
- # Use available speakers (detect what's actually available, prioritize Bourama)
213
  def get_speaker_names():
214
  speakers = get_speakers_dict()
215
  if speakers:
216
  speaker_list = list(speakers.keys())
217
- # Reorder to match preferred order (Bourama first)
218
  preferred_order = ["Bourama", "Adama", "Moussa", "Modibo", "Seydou",
219
  "Amadou", "Bakary", "Ngolo", "Ibrahima", "Amara"]
220
 
221
- # Sort available speakers according to preferred order
222
  ordered_speakers = []
223
  for speaker in preferred_order:
224
  if speaker in speaker_list:
225
  ordered_speakers.append(speaker)
226
 
227
- # Add any remaining speakers not in preferred list
228
  for speaker in speaker_list:
229
  if speaker not in ordered_speakers:
230
  ordered_speakers.append(speaker)
@@ -232,51 +225,48 @@ def get_speaker_names():
232
  logger.info(f"Available speakers: {ordered_speakers}")
233
  return ordered_speakers
234
  else:
235
- # Final fallback with Bourama first
236
  logger.warning("No speakers loaded, using fallback list")
237
  return ["Bourama", "Adama", "Moussa", "Modibo", "Seydou"]
238
 
239
  SPEAKER_NAMES = get_speaker_names()
240
 
241
- # Examples representing ALL 10 speakers - with fallbacks for missing speakers
242
  examples = [
243
- ["Aw ni ce", "Adama"], # Natural conversational greeting
244
- ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Moussa"], # Clear pronunciation for informative content
245
- ["Ne bɛ se ka sɛbɛnni yɛlɛma ka kɛ kuma ye", "Bourama"], # Most stable for educational content
246
- ["I ka kɛnɛ wa?", "Modibo"], # Expressive delivery for questions
247
- ["Lakɔli karamɔgɔw tun tɛ ka se ka sɛbɛnni kɛ ka ɲɛ walanda kan wa denmisɛnw tun tɛ ka se ka o sɛbɛnni ninnu ye, kuma tɛ ka u kalan. Denmisɛnw kɛra kunfinw ye.", "Adama"], # Natural conversational tone for longer explanation
248
- ["sigikafɔ kɔnɔ jamanaw ni ɲɔgɔn cɛ, olu ye a haminankow ye, wa o ko ninnu ka kan ka kɛ sariya ani tilennenya kɔnɔ.", "Seydou"], # Balanced characteristics for formal content
249
- ["Aw ni ce. Ne tɔgɔ ye Adama. Awɔ, ne ye maliden de ye. Aw Sanbɛ Sanbɛ. San min tɛ ɲinan ye, an bɛɛ ka jɛ ka o seli ɲɔgɔn fɛ, hɛɛrɛ ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa ɲɛ. Ala ka an ka seliw caya. Ala ka yafa an bɛɛ ma.", "Moussa"], # Clear pronunciation for heartfelt long message
250
- ["An dɔlakelen bɛ masike bilenman don ka tɔw gɛn.", "Bourama"], # Most stable for complex statement
251
- ["Aw ni ce. Seidu bɛ aw fo wa aw ka yafa a ma, ka da a kan tuma dɔw la kow ka can.", "Modibo"], # Expressive delivery for personal greeting
252
- ["To tɔ nantan ni lafiya, o ka fisa ni so fa dumuniba kɛlɛma ye.", "Amadou"], # Warm and friendly voice for wisdom saying
253
- ["Mali ye jamana ɲuman ye!", "Bakary"], # Deep, authoritative tone for patriotic statement
254
- ["An ka ɲɔgɔn dɛmɛ ka baara kɛ ɲɔgɔn fɛ", "Ngolo"], # Youthful and energetic for collaboration
255
- ["Hakili to yɔrɔ min na, sabali bɛ yen", "Ibrahima"], # Calm and measured for philosophical thought
256
- ["Dɔnko ɲuman ye, a bɛ dɔn mɔgɔ kɔnɔ", "Amara"], # Melodic and smooth for poetic expression
257
  ]
258
 
259
  def get_safe_examples():
260
  """Get examples with speaker fallbacks for missing speakers"""
261
  safe_examples = []
262
 
263
- # Fallback mapping for missing speakers
264
  fallback_speakers = {
265
- "Amadou": "Adama", # Warm -> Natural conversational
266
- "Bakary": "Modibo", # Authoritative -> Expressive
267
- "Ngolo": "Adama", # Youthful -> Natural conversational
268
- "Ibrahima": "Seydou", # Calm -> Balanced
269
- "Amara": "Moussa" # Melodic -> Clear pronunciation
270
  }
271
 
272
  for text, speaker in examples:
273
- # Use original speaker if available, otherwise use fallback
274
  if speaker in SPEAKER_NAMES:
275
  safe_examples.append([text, speaker])
276
  elif speaker in fallback_speakers and fallback_speakers[speaker] in SPEAKER_NAMES:
277
  safe_examples.append([text, fallback_speakers[speaker]])
278
  else:
279
- # Final fallback to first available speaker
280
  safe_examples.append([text, SPEAKER_NAMES[0]])
281
 
282
  return safe_examples
 
1
  import os
2
 
3
+
4
  os.environ["TORCHDYNAMO_DISABLE"] = "1"
5
  os.environ["TORCH_COMPILE_DISABLE"] = "1"
6
  os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
7
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
 
9
+
10
  def is_restricted_environment():
11
  return (
12
  os.getenv("ZERO_GPU") or
 
15
  "spaces" in str(os.getenv("HOSTNAME", "")).lower()
16
  )
17
 
18
+
19
  if is_restricted_environment():
20
  os.environ["UNSLOTH_DISABLE"] = "1"
21
  os.environ["DISABLE_UNSLOTH"] = "1"
 
44
  if hf_token:
45
  login(token=hf_token)
46
 
47
+
48
  _tts_model = None
49
  _speakers_dict = None
50
  _model_initialized = False
 
53
  def get_speakers_dict():
54
  """Get speakers dictionary using the correct SDK structure"""
55
  try:
 
56
  from maliba_ai.config.settings import Speakers
 
 
57
  speakers_dict = {
58
  "Adama": Speakers.Adama,
59
  "Moussa": Speakers.Moussa,
 
187
  if waveform is None or waveform.size == 0:
188
  return None, "Failed to generate audio. Please try again."
189
 
 
190
  if isinstance(waveform, torch.Tensor):
191
  waveform = waveform.cpu().numpy()
192
 
 
193
  if waveform.dtype == np.float32:
194
  # Normalize to [-1, 1] range if needed
195
  if np.max(np.abs(waveform)) > 1.0:
 
204
  logger.error(f"Speech generation failed: {e}")
205
  return None, f"❌ Error: {str(e)}"
206
 
207
+
208
  def get_speaker_names():
209
  speakers = get_speakers_dict()
210
  if speakers:
211
  speaker_list = list(speakers.keys())
212
+
213
  preferred_order = ["Bourama", "Adama", "Moussa", "Modibo", "Seydou",
214
  "Amadou", "Bakary", "Ngolo", "Ibrahima", "Amara"]
215
 
 
216
  ordered_speakers = []
217
  for speaker in preferred_order:
218
  if speaker in speaker_list:
219
  ordered_speakers.append(speaker)
220
 
 
221
  for speaker in speaker_list:
222
  if speaker not in ordered_speakers:
223
  ordered_speakers.append(speaker)
 
225
  logger.info(f"Available speakers: {ordered_speakers}")
226
  return ordered_speakers
227
  else:
228
+
229
  logger.warning("No speakers loaded, using fallback list")
230
  return ["Bourama", "Adama", "Moussa", "Modibo", "Seydou"]
231
 
232
  SPEAKER_NAMES = get_speaker_names()
233
 
234
+
235
  examples = [
236
+ ["Aw ni ce", "Adama"],
237
+ ["Mali bɛna diya kɔsɛbɛ, ka a da a kan baara bɛ ka kɛ.", "Moussa"],
238
+ ["Ne bɛ se ka sɛbɛnni yɛlɛma ka kɛ kuma ye", "Bourama"],
239
+ ["I ka kɛnɛ wa?", "Modibo"],
240
+ ["Lakɔli karamɔgɔw tun tɛ ka se ka sɛbɛnni kɛ ka ɲɛ walanda kan wa denmisɛnw tun tɛ ka se ka o sɛbɛnni ninnu ye, kuma tɛ ka u kalan. Denmisɛnw kɛra kunfinw ye.", "Adama"],
241
+ ["sigikafɔ kɔnɔ jamanaw ni ɲɔgɔn cɛ, olu ye a haminankow ye, wa o ko ninnu ka kan ka kɛ sariya ani tilennenya kɔnɔ.", "Seydou"],
242
+ ["Aw ni ce. Ne tɔgɔ ye Adama. Awɔ, ne ye maliden de ye. Aw Sanbɛ Sanbɛ. San min tɛ ɲinan ye, an bɛɛ ka jɛ ka o seli ɲɔgɔn fɛ, hɛɛrɛ ni lafiya la. Ala ka Mali suma. Ala ka Mali yiriwa. Ala ka Mali taa ɲɛ. Ala ka an ka seliw caya. Ala ka yafa an bɛɛ ma.", "Moussa"],
243
+ ["An dɔlakelen bɛ masike bilenman don ka tɔw gɛn.", "Bourama"],
244
+ ["Aw ni ce. Seidu bɛ aw fo wa aw ka yafa a ma, ka da a kan tuma dɔw la kow ka can.", "Modibo"],
245
+ ["To tɔ nantan ni lafiya, o ka fisa ni so fa dumuniba kɛlɛma ye.", "Amadou"],
246
+ ["Mali ye jamana ɲuman ye!", "Bakary"],
247
+ ["An ka ɲɔgɔn dɛmɛ ka baara kɛ ɲɔgɔn fɛ", "Ngolo"],
248
+ ["Hakili to yɔrɔ min na, sabali bɛ yen", "Ibrahima"],
249
+ ["Dɔnko ɲuman ye, a bɛ dɔn mɔgɔ kɔnɔ", "Amara"],
250
  ]
251
 
252
  def get_safe_examples():
253
  """Get examples with speaker fallbacks for missing speakers"""
254
  safe_examples = []
255
 
 
256
  fallback_speakers = {
257
+ "Amadou": "Adama",
258
+ "Bakary": "Modibo",
259
+ "Ngolo": "Adama",
260
+ "Ibrahima": "Seydou",
261
+ "Amara": "Moussa"
262
  }
263
 
264
  for text, speaker in examples:
 
265
  if speaker in SPEAKER_NAMES:
266
  safe_examples.append([text, speaker])
267
  elif speaker in fallback_speakers and fallback_speakers[speaker] in SPEAKER_NAMES:
268
  safe_examples.append([text, fallback_speakers[speaker]])
269
  else:
 
270
  safe_examples.append([text, SPEAKER_NAMES[0]])
271
 
272
  return safe_examples