ceymox commited on
Commit
4ac6a9c
Β·
verified Β·
1 Parent(s): 7e57840

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -42
app.py CHANGED
@@ -135,6 +135,30 @@ def load_chatterbox_model():
135
 
136
  return False
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  def get_or_load_model():
139
  """Load ChatterboxTTS model if not already loaded"""
140
  global MODEL
@@ -259,10 +283,29 @@ def generate_tts_audio(
259
  logger.warning("🚨 USING FALLBACK - Real ChatterboxTTS not found!")
260
  logger.warning("πŸ“‹ To fix: Upload your ChatterboxTTS package to this Space")
261
 
 
 
 
 
262
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  wav = current_model.generate(
264
  text_input[:300], # Limit text length
265
- audio_prompt_path=audio_prompt_path_input,
266
  exaggeration=exaggeration_input,
267
  temperature=temperature_input,
268
  cfg_weight=cfgw_input,
@@ -278,6 +321,14 @@ def generate_tts_audio(
278
  except Exception as e:
279
  logger.error(f"❌ Audio generation failed: {e}")
280
  raise
 
 
 
 
 
 
 
 
281
 
282
  # FastAPI app for API endpoints
283
  app = FastAPI(
@@ -341,47 +392,68 @@ async def synthesize_speech(request: TTSRequest):
341
 
342
  start_time = time.time()
343
 
344
- # Generate audio
345
- sample_rate, audio_data = generate_tts_audio(
346
- request.text,
347
- request.audio_prompt_url,
348
- request.exaggeration,
349
- request.temperature,
350
- request.seed,
351
- request.cfg_weight
352
- )
353
 
354
- generation_time = time.time() - start_time
355
-
356
- # Save audio file
357
- audio_id = generate_id()
358
- audio_path = os.path.join(AUDIO_DIR, f"{audio_id}.wav")
359
- sf.write(audio_path, audio_data, sample_rate)
360
-
361
- # Cache audio info
362
- audio_cache[audio_id] = {
363
- "path": audio_path,
364
- "text": request.text,
365
- "sample_rate": sample_rate,
366
- "duration": len(audio_data) / sample_rate,
367
- "generated_at": time.time(),
368
- "generation_time": generation_time,
369
- "real_chatterbox": CHATTERBOX_AVAILABLE
370
- }
371
 
372
- message = "Speech synthesized successfully"
373
- if not CHATTERBOX_AVAILABLE:
374
- message += " (using fallback - upload ChatterboxTTS for real synthesis)"
375
-
376
- logger.info(f"βœ… Audio saved: {audio_id} ({generation_time:.2f}s)")
377
-
378
- return TTSResponse(
379
- success=True,
380
- audio_id=audio_id,
381
- message=message,
382
- sample_rate=sample_rate,
383
- duration=len(audio_data) / sample_rate
384
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
  except HTTPException:
387
  raise
@@ -501,8 +573,9 @@ def create_gradio_interface():
501
 
502
  audio_prompt = gr.Textbox(
503
  value="https://storage.googleapis.com/chatterbox-demo-samples/prompts/female_shadowheart4.flac",
504
- label="Reference Audio URL",
505
- placeholder="URL to reference audio file"
 
506
  )
507
 
508
  with gr.Row():
 
135
 
136
  return False
137
 
138
+ def download_audio_from_url(url):
139
+ """Download audio from URL and save to temporary file"""
140
+ try:
141
+ logger.info(f"πŸ“₯ Downloading reference audio from: {url}")
142
+ response = requests.get(url, timeout=30, headers={
143
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
144
+ })
145
+
146
+ if response.status_code == 200:
147
+ # Create temporary file
148
+ temp_file = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
149
+ temp_file.write(response.content)
150
+ temp_file.close()
151
+
152
+ logger.info(f"βœ… Audio downloaded to: {temp_file.name}")
153
+ return temp_file.name
154
+ else:
155
+ logger.error(f"❌ HTTP {response.status_code} when downloading audio")
156
+ return None
157
+
158
+ except Exception as e:
159
+ logger.error(f"❌ Error downloading audio from URL: {e}")
160
+ return None
161
+
162
  def get_or_load_model():
163
  """Load ChatterboxTTS model if not already loaded"""
164
  global MODEL
 
283
  logger.warning("🚨 USING FALLBACK - Real ChatterboxTTS not found!")
284
  logger.warning("πŸ“‹ To fix: Upload your ChatterboxTTS package to this Space")
285
 
286
+ # Handle audio prompt - download if it's a URL
287
+ audio_prompt_path = audio_prompt_path_input
288
+ temp_audio_file = None
289
+
290
  try:
291
+ # Check if it's a URL
292
+ if audio_prompt_path_input and (audio_prompt_path_input.startswith('http://') or audio_prompt_path_input.startswith('https://')):
293
+ logger.info(f"🌐 Detected URL, downloading audio: {audio_prompt_path_input}")
294
+ temp_audio_file = download_audio_from_url(audio_prompt_path_input)
295
+ if temp_audio_file:
296
+ audio_prompt_path = temp_audio_file
297
+ logger.info(f"βœ… Using downloaded audio: {audio_prompt_path}")
298
+ else:
299
+ logger.warning("⚠️ Failed to download audio, proceeding without reference")
300
+ audio_prompt_path = None
301
+ elif audio_prompt_path_input and not os.path.exists(audio_prompt_path_input):
302
+ logger.warning(f"⚠️ Audio file not found: {audio_prompt_path_input}, proceeding without reference")
303
+ audio_prompt_path = None
304
+
305
+ # Generate audio
306
  wav = current_model.generate(
307
  text_input[:300], # Limit text length
308
+ audio_prompt_path=audio_prompt_path,
309
  exaggeration=exaggeration_input,
310
  temperature=temperature_input,
311
  cfg_weight=cfgw_input,
 
321
  except Exception as e:
322
  logger.error(f"❌ Audio generation failed: {e}")
323
  raise
324
+ finally:
325
+ # Clean up temporary file
326
+ if temp_audio_file and os.path.exists(temp_audio_file):
327
+ try:
328
+ os.unlink(temp_audio_file)
329
+ logger.info(f"πŸ—‘οΈ Cleaned up temporary file: {temp_audio_file}")
330
+ except:
331
+ pass
332
 
333
  # FastAPI app for API endpoints
334
  app = FastAPI(
 
392
 
393
  start_time = time.time()
394
 
395
+ # Handle audio prompt URL
396
+ audio_prompt_path = request.audio_prompt_url
397
+ temp_audio_file = None
 
 
 
 
 
 
398
 
399
+ if request.audio_prompt_url and (request.audio_prompt_url.startswith('http://') or request.audio_prompt_url.startswith('https://')):
400
+ temp_audio_file = download_audio_from_url(request.audio_prompt_url)
401
+ if temp_audio_file:
402
+ audio_prompt_path = temp_audio_file
403
+ else:
404
+ logger.warning("Failed to download reference audio, proceeding without")
405
+ audio_prompt_path = None
 
 
 
 
 
 
 
 
 
 
406
 
407
+ try:
408
+ # Generate audio
409
+ sample_rate, audio_data = generate_tts_audio(
410
+ request.text,
411
+ audio_prompt_path,
412
+ request.exaggeration,
413
+ request.temperature,
414
+ request.seed,
415
+ request.cfg_weight
416
+ )
417
+
418
+ generation_time = time.time() - start_time
419
+
420
+ # Save audio file
421
+ audio_id = generate_id()
422
+ audio_path = os.path.join(AUDIO_DIR, f"{audio_id}.wav")
423
+ sf.write(audio_path, audio_data, sample_rate)
424
+
425
+ # Cache audio info
426
+ audio_cache[audio_id] = {
427
+ "path": audio_path,
428
+ "text": request.text,
429
+ "sample_rate": sample_rate,
430
+ "duration": len(audio_data) / sample_rate,
431
+ "generated_at": time.time(),
432
+ "generation_time": generation_time,
433
+ "real_chatterbox": CHATTERBOX_AVAILABLE
434
+ }
435
+
436
+ message = "Speech synthesized successfully"
437
+ if not CHATTERBOX_AVAILABLE:
438
+ message += " (using fallback - upload ChatterboxTTS for real synthesis)"
439
+
440
+ logger.info(f"βœ… Audio saved: {audio_id} ({generation_time:.2f}s)")
441
+
442
+ return TTSResponse(
443
+ success=True,
444
+ audio_id=audio_id,
445
+ message=message,
446
+ sample_rate=sample_rate,
447
+ duration=len(audio_data) / sample_rate
448
+ )
449
+
450
+ finally:
451
+ # Clean up temporary audio file
452
+ if temp_audio_file and os.path.exists(temp_audio_file):
453
+ try:
454
+ os.unlink(temp_audio_file)
455
+ except:
456
+ pass
457
 
458
  except HTTPException:
459
  raise
 
573
 
574
  audio_prompt = gr.Textbox(
575
  value="https://storage.googleapis.com/chatterbox-demo-samples/prompts/female_shadowheart4.flac",
576
+ label="Reference Audio URL or File Path",
577
+ placeholder="https://example.com/audio.wav or /path/to/local/file.wav",
578
+ info="URL will be downloaded automatically, or use local file path"
579
  )
580
 
581
  with gr.Row():