ceymox commited on
Commit
f5147dc
·
verified ·
1 Parent(s): 1523ffa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +210 -87
app.py CHANGED
@@ -12,6 +12,10 @@ import gradio as gr
12
  from transformers import AutoModel, logging as trf_logging
13
  from huggingface_hub import login, hf_hub_download, scan_cache_dir
14
 
 
 
 
 
15
  # Enable verbose logging for transformers
16
  trf_logging.set_verbosity_info()
17
 
@@ -33,34 +37,73 @@ model = None
33
  # Define the repository ID
34
  repo_id = "ai4bharat/IndicF5"
35
 
36
- # Improved model loading with error handling
37
- try:
38
- print(f"Loading {repo_id} model...")
39
- # Try direct loading first
40
- model = AutoModel.from_pretrained(
41
- repo_id,
42
- trust_remote_code=True,
43
- revision="main"
44
- ).to(device)
45
- print(f"Model loaded successfully! Type: {type(model)}")
46
 
47
- # Check model attributes
48
- model_methods = [method for method in dir(model) if not method.startswith('_') and callable(getattr(model, method))]
49
- print(f"Available model methods: {model_methods[:10]}...")
50
-
51
- except Exception as e:
52
- print(f"⚠️ Error loading model directly: {e}")
 
 
 
 
 
 
 
 
 
 
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  try:
55
- # Try loading with local_files_only if model is cached
56
  model = AutoModel.from_pretrained(
57
  repo_id,
58
  trust_remote_code=True,
59
- local_files_only=True
 
 
 
 
60
  ).to(device)
61
- print("Model loaded from cache!")
62
  except Exception as e2:
63
  print(f"❌ All attempts to load model failed: {e2}")
 
 
 
 
64
 
65
  # Advanced audio processing functions
66
  def remove_noise(audio_data, threshold=0.01):
@@ -147,54 +190,79 @@ def enhance_audio(audio_data):
147
 
148
  return audio_data
149
 
150
- # Load audio from URL with improved error handling
151
- def load_audio_from_url(url):
152
  print(f"Downloading reference audio from {url}")
153
- try:
154
- response = requests.get(url)
155
- if response.status_code == 200:
156
- try:
157
- # Save content to a temp file
158
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
159
- temp_file.write(response.content)
160
- temp_file.close()
161
- print(f"Saved reference audio to temp file: {temp_file.name}")
162
-
163
- # Try different methods to read the audio file
164
- audio_data = None
165
- sample_rate = None
166
-
167
- # Try SoundFile first
168
  try:
169
- audio_data, sample_rate = sf.read(temp_file.name)
170
- print(f"Audio loaded with SoundFile: {sample_rate}Hz, {len(audio_data)} samples")
171
- except Exception as sf_error:
172
- print(f"SoundFile failed: {sf_error}")
 
 
 
 
 
173
 
174
- # Try librosa as fallback
175
  try:
176
- audio_data, sample_rate = librosa.load(temp_file.name, sr=None)
177
- print(f"Audio loaded with librosa: {sample_rate}Hz, shape={audio_data.shape}")
178
- except Exception as lr_error:
179
- print(f"Librosa also failed: {lr_error}")
180
-
181
- # Clean up temp file
182
- os.unlink(temp_file.name)
183
-
184
- if audio_data is not None:
185
- # Apply audio enhancement to the reference
186
- audio_data = enhance_audio(audio_data)
187
- return sample_rate, audio_data
188
 
189
- except Exception as e:
190
- print(f"Failed to process audio data: {e}")
191
- else:
192
- print(f"Failed to download audio: status code {response.status_code}")
193
- except Exception as e:
194
- print(f"Error downloading audio: {e}")
195
-
196
- # Return default values as fallback
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  print("⚠️ Returning default silence as reference audio")
 
 
 
 
 
 
 
 
 
 
 
198
  return 24000, np.zeros(int(24000)) # 1 second of silence at 24kHz
199
 
200
  # Split text into chunks for streaming
@@ -241,7 +309,7 @@ def split_into_chunks(text, max_length=30):
241
  print(f"Split text into {len(final_chunks)} chunks")
242
  return final_chunks
243
 
244
- # Improved model wrapper
245
  class ModelWrapper:
246
  def __init__(self, model):
247
  self.model = model
@@ -274,9 +342,14 @@ class ModelWrapper:
274
  def generate(self, text, ref_audio_path, ref_text, **kwargs):
275
  """Generate speech with improved error handling and preprocessing"""
276
  print(f"\n==== MODEL INFERENCE ====")
277
- print(f"Text input: '{text}'")
278
  print(f"Reference audio path: {ref_audio_path}")
279
 
 
 
 
 
 
280
  # Check if files exist
281
  if not os.path.exists(ref_audio_path):
282
  print(f"⚠️ Reference audio file not found")
@@ -292,25 +365,31 @@ class ModelWrapper:
292
  {"text": text, "ref_audio_path": ref_audio_path, "ref_text": ref_text},
293
  # Second try: alternative parameter names
294
  {"text": text, "reference_audio": ref_audio_path, "speaker_text": ref_text},
295
- # Third try: just text and audio
 
 
296
  {"text": text, "reference_audio": ref_audio_path},
297
- # Fourth try: just text
298
  {"text": text},
299
- # Fifth try: positional arguments
300
  {} # Will use positional below
301
  ]
302
 
303
- # Try each parameter combination
304
  for i, params in enumerate(param_combinations):
305
  try:
306
  method = getattr(self.model, method_name)
307
  print(f"Attempt {i+1}: Calling model.{method_name} with {list(params.keys())} parameters")
308
 
309
- # For the positional arguments case
310
- if not params:
311
- result = method(text, ref_audio_path, ref_text, **kwargs)
312
- else:
313
- result = method(**params, **kwargs)
 
 
 
 
314
 
315
  print(f"✓ Call succeeded with parameters: {list(params.keys())}")
316
  break # Exit loop if successful
@@ -344,7 +423,7 @@ class ModelWrapper:
344
  # Create model wrapper
345
  model_wrapper = ModelWrapper(model) if model is not None else None
346
 
347
- # Streaming TTS class with improved audio quality
348
  class StreamingTTS:
349
  def __init__(self):
350
  self.is_generating = False
@@ -354,10 +433,15 @@ class StreamingTTS:
354
  self.output_file = None
355
  self.all_chunks = []
356
  self.sample_rate = 24000 # Default sample rate
 
357
 
358
  # Create temp directory
359
- self.temp_dir = tempfile.mkdtemp()
360
- print(f"Created temp directory: {self.temp_dir}")
 
 
 
 
361
 
362
  def prepare_ref_audio(self, ref_audio, ref_sr):
363
  """Prepare reference audio with enhanced quality"""
@@ -400,13 +484,17 @@ class StreamingTTS:
400
  print(f"Error cleaning up: {e}")
401
 
402
  def generate(self, text, ref_audio, ref_sr, ref_text):
403
- """Start generation in a new thread"""
404
  if self.is_generating:
405
  print("Already generating speech, please wait")
406
  return
 
 
 
 
407
 
408
  # Check model is loaded
409
- if model_wrapper is None:
410
  print("⚠️ Model is not loaded. Cannot generate speech.")
411
  return
412
 
@@ -424,9 +512,18 @@ class StreamingTTS:
424
  def _process_streaming(self, text, ref_audio, ref_sr, ref_text):
425
  """Process text in chunks with high-quality audio generation"""
426
  try:
 
 
 
 
 
 
427
  # Prepare reference audio
428
  self.prepare_ref_audio(ref_audio, ref_sr)
429
 
 
 
 
430
  # Split text into smaller chunks for faster processing
431
  chunks = split_into_chunks(text)
432
  print(f"Processing {len(chunks)} chunks")
@@ -441,15 +538,19 @@ class StreamingTTS:
441
  break
442
 
443
  chunk_start = time.time()
444
- print(f"Processing chunk {i+1}/{len(chunks)}: {chunk}")
445
 
446
  # Generate speech for this chunk
447
  try:
 
 
 
448
  with torch.inference_mode():
 
449
  chunk_audio = model_wrapper.generate(
450
- chunk,
451
- self.ref_audio_path,
452
- ref_text
453
  )
454
 
455
  if chunk_audio is None or (hasattr(chunk_audio, 'size') and chunk_audio.size == 0):
@@ -489,7 +590,15 @@ class StreamingTTS:
489
  print(f"Total generation time: {total_time:.2f}s")
490
 
491
  except Exception as e:
492
- print(f"Error in streaming TTS: {str(e)[:100]}")
 
 
 
 
 
 
 
 
493
  finally:
494
  self.is_generating = False
495
  print("Generation complete")
@@ -511,7 +620,7 @@ class StreamingTTS:
511
  EXAMPLES = [{
512
  "audio_url": "https://raw.githubusercontent.com/Aparna0112/voicerecording-_TTS/main/KC%20Voice.wav",
513
  "ref_text": "ഹലോ ഇത് അപരനെ അല്ലേ ഞാൻ ജഗദീപ് ആണ് വിളിക്കുന്നത് ഇപ്പോൾ ഫ്രീയാണോ സംസാരിക്കാമോ ",
514
- "synth_text": "ബ്രാഹ്മീയ ലിപികുടുംബത്തിൽ ഉൾപ്പെടുന്ന ഒരു ലിപിയാണ് മലയാള ലിപി."
515
  }]
516
 
517
  print("\nPreloading reference audio...")
@@ -530,7 +639,7 @@ def stop_generation():
530
  streaming_tts.stop()
531
  return "Generation stopped"
532
 
533
- # Gradio interface
534
  with gr.Blocks() as iface:
535
  gr.Markdown("## 🚀 IndicF5 Malayalam TTS")
536
 
@@ -574,21 +683,33 @@ with gr.Blocks() as iface:
574
  if ref_audio is None:
575
  return None, "⚠️ Reference audio not loaded. Cannot generate speech.", "Error: Reference audio not loaded"
576
 
 
 
 
577
  # Capture stdout for debug purposes
578
  import io
579
  from contextlib import redirect_stdout
580
  f = io.StringIO()
581
  with redirect_stdout(f):
582
- streaming_tts.generate(text, ref_audio, ref_sr, EXAMPLES[0]["ref_text"] if EXAMPLES else "")
 
 
 
 
 
 
 
 
 
583
 
584
  debug_log = f.getvalue()
585
 
586
  # Add a delay to ensure file is created
587
- time.sleep(1.5)
588
 
589
  audio_path = streaming_tts.get_current_audio()
590
  if audio_path and os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
591
- return audio_path, "Generation started - audio playing", debug_log
592
  else:
593
  return None, "Starting generation... please wait", debug_log
594
 
@@ -602,5 +723,7 @@ def exit_handler():
602
  import atexit
603
  atexit.register(exit_handler)
604
 
 
605
  print("Starting Gradio interface...")
 
606
  iface.launch()
 
12
  from transformers import AutoModel, logging as trf_logging
13
  from huggingface_hub import login, hf_hub_download, scan_cache_dir
14
 
15
+ # Increase timeout for transformers HTTP requests
16
+ import os
17
+ os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "300" # 5 minutes timeout
18
+
19
  # Enable verbose logging for transformers
20
  trf_logging.set_verbosity_info()
21
 
 
37
  # Define the repository ID
38
  repo_id = "ai4bharat/IndicF5"
39
 
40
+ # Improved model loading with error handling and cache checking
41
+ def load_model_with_retry(max_retries=3, retry_delay=5):
42
+ global model
 
 
 
 
 
 
 
43
 
44
+ # First, check if model is already in cache
45
+ print("Checking if model is in cache...")
46
+ try:
47
+ cache_info = scan_cache_dir()
48
+ model_in_cache = any(repo_id in repo.repo_id for repo in cache_info.repos)
49
+ if model_in_cache:
50
+ print(f"Model {repo_id} found in cache, loading locally...")
51
+ model = AutoModel.from_pretrained(
52
+ repo_id,
53
+ trust_remote_code=True,
54
+ local_files_only=True
55
+ ).to(device)
56
+ print("Model loaded from cache successfully!")
57
+ return
58
+ except Exception as e:
59
+ print(f"Cache check failed: {e}")
60
 
61
+ # If not in cache or cache check failed, try loading with retries
62
+ for attempt in range(max_retries):
63
+ try:
64
+ print(f"Loading {repo_id} model (attempt {attempt+1}/{max_retries})...")
65
+ model = AutoModel.from_pretrained(
66
+ repo_id,
67
+ trust_remote_code=True,
68
+ revision="main",
69
+ use_auth_token=hf_token, # Use token if available
70
+ low_cpu_mem_usage=True # Reduce memory usage
71
+ ).to(device)
72
+
73
+ print(f"Model loaded successfully! Type: {type(model)}")
74
+
75
+ # Check model attributes
76
+ model_methods = [method for method in dir(model) if not method.startswith('_') and callable(getattr(model, method))]
77
+ print(f"Available model methods: {model_methods[:10]}...")
78
+
79
+ return # Success, exit function
80
+
81
+ except Exception as e:
82
+ print(f"⚠️ Attempt {attempt+1}/{max_retries} failed: {e}")
83
+ if attempt < max_retries - 1:
84
+ print(f"Waiting {retry_delay} seconds before retrying...")
85
+ time.sleep(retry_delay)
86
+ retry_delay *= 1.5 # Exponential backoff
87
+
88
+ # If all attempts failed, try one last time with fallback options
89
  try:
90
+ print("Trying with fallback options...")
91
  model = AutoModel.from_pretrained(
92
  repo_id,
93
  trust_remote_code=True,
94
+ revision="main",
95
+ local_files_only=False,
96
+ use_auth_token=hf_token,
97
+ force_download=False,
98
+ resume_download=True
99
  ).to(device)
100
+ print("Model loaded with fallback options!")
101
  except Exception as e2:
102
  print(f"❌ All attempts to load model failed: {e2}")
103
+ print("Will continue without model loaded.")
104
+
105
+ # Call the improved loading function
106
+ load_model_with_retry()
107
 
108
  # Advanced audio processing functions
109
  def remove_noise(audio_data, threshold=0.01):
 
190
 
191
  return audio_data
192
 
193
+ # Load audio from URL with improved error handling and retries
194
+ def load_audio_from_url(url, max_retries=3):
195
  print(f"Downloading reference audio from {url}")
196
+
197
+ for attempt in range(max_retries):
198
+ try:
199
+ # Use a longer timeout
200
+ response = requests.get(url, timeout=60) # 60 second timeout
201
+
202
+ if response.status_code == 200:
 
 
 
 
 
 
 
 
203
  try:
204
+ # Save content to a temp file
205
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
206
+ temp_file.write(response.content)
207
+ temp_file.close()
208
+ print(f"Saved reference audio to temp file: {temp_file.name}")
209
+
210
+ # Try different methods to read the audio file
211
+ audio_data = None
212
+ sample_rate = None
213
 
214
+ # Try SoundFile first
215
  try:
216
+ audio_data, sample_rate = sf.read(temp_file.name)
217
+ print(f"Audio loaded with SoundFile: {sample_rate}Hz, {len(audio_data)} samples")
218
+ except Exception as sf_error:
219
+ print(f"SoundFile failed: {sf_error}")
220
+
221
+ # Try librosa as fallback
222
+ try:
223
+ audio_data, sample_rate = librosa.load(temp_file.name, sr=None)
224
+ print(f"Audio loaded with librosa: {sample_rate}Hz, shape={audio_data.shape}")
225
+ except Exception as lr_error:
226
+ print(f"Librosa also failed: {lr_error}")
 
227
 
228
+ # Clean up temp file
229
+ os.unlink(temp_file.name)
230
+
231
+ if audio_data is not None:
232
+ # Apply audio enhancement to the reference
233
+ audio_data = enhance_audio(audio_data)
234
+ return sample_rate, audio_data
235
+
236
+ except Exception as e:
237
+ print(f"Failed to process audio data: {e}")
238
+ else:
239
+ print(f"Failed to download audio: status code {response.status_code}")
240
+
241
+ except requests.exceptions.Timeout:
242
+ if attempt < max_retries - 1:
243
+ wait_time = (attempt + 1) * 5 # Exponential backoff
244
+ print(f"Request timed out. Retrying in {wait_time} seconds...")
245
+ time.sleep(wait_time)
246
+ else:
247
+ print("All retry attempts failed due to timeout.")
248
+ except Exception as e:
249
+ print(f"Error downloading audio: {e}")
250
+ if attempt < max_retries - 1:
251
+ time.sleep(5)
252
+
253
+ # If we reach here, all attempts failed
254
  print("⚠️ Returning default silence as reference audio")
255
+
256
+ # Try to load a local backup audio if provided
257
+ backup_path = "backup_reference.wav"
258
+ if os.path.exists(backup_path):
259
+ try:
260
+ audio_data, sample_rate = sf.read(backup_path)
261
+ print(f"Loaded backup reference audio: {sample_rate}Hz")
262
+ return sample_rate, audio_data
263
+ except Exception as e:
264
+ print(f"Failed to load backup audio: {e}")
265
+
266
  return 24000, np.zeros(int(24000)) # 1 second of silence at 24kHz
267
 
268
  # Split text into chunks for streaming
 
309
  print(f"Split text into {len(final_chunks)} chunks")
310
  return final_chunks
311
 
312
+ # Improved model wrapper with timeout handling
313
  class ModelWrapper:
314
  def __init__(self, model):
315
  self.model = model
 
342
  def generate(self, text, ref_audio_path, ref_text, **kwargs):
343
  """Generate speech with improved error handling and preprocessing"""
344
  print(f"\n==== MODEL INFERENCE ====")
345
+ print(f"Text to generate: '{text}'") # Make sure this is the text we want to generate
346
  print(f"Reference audio path: {ref_audio_path}")
347
 
348
+ # Check if model is loaded
349
+ if self.model is None:
350
+ print("⚠️ Model is not loaded. Cannot generate speech.")
351
+ return np.zeros(int(24000)) # Return silence
352
+
353
  # Check if files exist
354
  if not os.path.exists(ref_audio_path):
355
  print(f"⚠️ Reference audio file not found")
 
365
  {"text": text, "ref_audio_path": ref_audio_path, "ref_text": ref_text},
366
  # Second try: alternative parameter names
367
  {"text": text, "reference_audio": ref_audio_path, "speaker_text": ref_text},
368
+ # Third try: alternative parameter names 2
369
+ {"text": text, "reference_audio": ref_audio_path, "reference_text": ref_text},
370
+ # Fourth try: just text and audio
371
  {"text": text, "reference_audio": ref_audio_path},
372
+ # Fifth try: just text
373
  {"text": text},
374
+ # Sixth try: positional arguments
375
  {} # Will use positional below
376
  ]
377
 
378
+ # Try each parameter combination with timeout
379
  for i, params in enumerate(param_combinations):
380
  try:
381
  method = getattr(self.model, method_name)
382
  print(f"Attempt {i+1}: Calling model.{method_name} with {list(params.keys())} parameters")
383
 
384
+ # Set a timeout for inference
385
+ with torch.inference_mode():
386
+ # For the positional arguments case
387
+ if not params:
388
+ print(f"Using positional args with text='{text}'")
389
+ result = method(text, ref_audio_path, ref_text, **kwargs)
390
+ else:
391
+ print(f"Using keyword args with text='{params.get('text')}'")
392
+ result = method(**params, **kwargs)
393
 
394
  print(f"✓ Call succeeded with parameters: {list(params.keys())}")
395
  break # Exit loop if successful
 
423
  # Create model wrapper
424
  model_wrapper = ModelWrapper(model) if model is not None else None
425
 
426
+ # Streaming TTS class with improved audio quality and error handling
427
  class StreamingTTS:
428
  def __init__(self):
429
  self.is_generating = False
 
433
  self.output_file = None
434
  self.all_chunks = []
435
  self.sample_rate = 24000 # Default sample rate
436
+ self.current_text = "" # Track current text being processed
437
 
438
  # Create temp directory
439
+ try:
440
+ self.temp_dir = tempfile.mkdtemp()
441
+ print(f"Created temp directory: {self.temp_dir}")
442
+ except Exception as e:
443
+ print(f"Error creating temp directory: {e}")
444
+ self.temp_dir = "." # Use current directory as fallback
445
 
446
  def prepare_ref_audio(self, ref_audio, ref_sr):
447
  """Prepare reference audio with enhanced quality"""
 
484
  print(f"Error cleaning up: {e}")
485
 
486
  def generate(self, text, ref_audio, ref_sr, ref_text):
487
+ """Start generation in a new thread with validation"""
488
  if self.is_generating:
489
  print("Already generating speech, please wait")
490
  return
491
+
492
+ # Store the text for verification
493
+ self.current_text = text
494
+ print(f"Setting current text to: '{self.current_text}'")
495
 
496
  # Check model is loaded
497
+ if model_wrapper is None or model is None:
498
  print("⚠️ Model is not loaded. Cannot generate speech.")
499
  return
500
 
 
512
  def _process_streaming(self, text, ref_audio, ref_sr, ref_text):
513
  """Process text in chunks with high-quality audio generation"""
514
  try:
515
+ # Double check text matches what we expect
516
+ if text != self.current_text:
517
+ print(f"⚠️ Text mismatch detected! Expected: '{self.current_text}', Got: '{text}'")
518
+ # Use the stored text to be safe
519
+ text = self.current_text
520
+
521
  # Prepare reference audio
522
  self.prepare_ref_audio(ref_audio, ref_sr)
523
 
524
+ # Print the text we're actually going to process
525
+ print(f"Processing text: '{text}'")
526
+
527
  # Split text into smaller chunks for faster processing
528
  chunks = split_into_chunks(text)
529
  print(f"Processing {len(chunks)} chunks")
 
538
  break
539
 
540
  chunk_start = time.time()
541
+ print(f"Processing chunk {i+1}/{len(chunks)}: '{chunk}'")
542
 
543
  # Generate speech for this chunk
544
  try:
545
+ # Set timeout for inference
546
+ chunk_timeout = 30 # 30 seconds timeout per chunk
547
+
548
  with torch.inference_mode():
549
+ # Explicitly pass the chunk text
550
  chunk_audio = model_wrapper.generate(
551
+ text=chunk, # Make sure we're using the current chunk
552
+ ref_audio_path=self.ref_audio_path,
553
+ ref_text=ref_text
554
  )
555
 
556
  if chunk_audio is None or (hasattr(chunk_audio, 'size') and chunk_audio.size == 0):
 
590
  print(f"Total generation time: {total_time:.2f}s")
591
 
592
  except Exception as e:
593
+ print(f"Error in streaming TTS: {str(e)[:200]}")
594
+ # Try to write whatever we have so far
595
+ if len(self.all_chunks) > 0:
596
+ try:
597
+ combined = np.concatenate(self.all_chunks)
598
+ sf.write(self.output_file, combined, 24000, format='WAV', subtype='FLOAT')
599
+ print("Saved partial output")
600
+ except Exception as e2:
601
+ print(f"Failed to save partial output: {e2}")
602
  finally:
603
  self.is_generating = False
604
  print("Generation complete")
 
620
  EXAMPLES = [{
621
  "audio_url": "https://raw.githubusercontent.com/Aparna0112/voicerecording-_TTS/main/KC%20Voice.wav",
622
  "ref_text": "ഹലോ ഇത് അപരനെ അല്ലേ ഞാൻ ജഗദീപ് ആണ് വിളിക്കുന്നത് ഇപ്പോൾ ഫ്രീയാണോ സംസാരിക്കാമോ ",
623
+ "synth_text": "ഞാൻ മലയാളം സംസാരിക്കാൻ കഴിയുന്നു."
624
  }]
625
 
626
  print("\nPreloading reference audio...")
 
639
  streaming_tts.stop()
640
  return "Generation stopped"
641
 
642
+ # Gradio interface with offline mode
643
  with gr.Blocks() as iface:
644
  gr.Markdown("## 🚀 IndicF5 Malayalam TTS")
645
 
 
683
  if ref_audio is None:
684
  return None, "⚠️ Reference audio not loaded. Cannot generate speech.", "Error: Reference audio not loaded"
685
 
686
+ # Print the text being processed
687
+ print(f"🔍 User input text: '{text}'")
688
+
689
  # Capture stdout for debug purposes
690
  import io
691
  from contextlib import redirect_stdout
692
  f = io.StringIO()
693
  with redirect_stdout(f):
694
+ try:
695
+ # Make sure the text is explicitly passed as the first parameter
696
+ streaming_tts.generate(
697
+ text=text, # Explicitly name parameter
698
+ ref_audio=ref_audio,
699
+ ref_sr=ref_sr,
700
+ ref_text=EXAMPLES[0]["ref_text"] if EXAMPLES else ""
701
+ )
702
+ except Exception as e:
703
+ print(f"Error starting generation: {e}")
704
 
705
  debug_log = f.getvalue()
706
 
707
  # Add a delay to ensure file is created
708
+ time.sleep(2.0)
709
 
710
  audio_path = streaming_tts.get_current_audio()
711
  if audio_path and os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
712
+ return audio_path, f"Generated speech for: {text[:30]}...", debug_log
713
  else:
714
  return None, "Starting generation... please wait", debug_log
715
 
 
723
  import atexit
724
  atexit.register(exit_handler)
725
 
726
+ # Start the interface with flexible port selection
727
  print("Starting Gradio interface...")
728
+ # Try a range of ports if 7860 is busy
729
  iface.launch()