ceymox commited on
Commit
e36284f
Β·
verified Β·
1 Parent(s): 6911c7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -5
app.py CHANGED
@@ -1,11 +1,14 @@
1
  import os
2
  import time
3
  import torch
 
4
  import numpy as np
5
  import soundfile as sf
6
  import tempfile
7
  import uuid
8
  import logging
 
 
9
  from typing import Optional, Dict, Any
10
  from pathlib import Path
11
 
@@ -16,8 +19,55 @@ from fastapi.responses import StreamingResponse
16
  from fastapi.middleware.cors import CORSMiddleware
17
  from pydantic import BaseModel
18
 
19
- # Import ChatterboxTTS
20
- from chatterbox.src.chatterbox.tts import ChatterboxTTS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # Configure logging
23
  logging.basicConfig(level=logging.INFO)
@@ -56,6 +106,7 @@ def set_seed(seed: int):
56
  if DEVICE == "cuda":
57
  torch.cuda.manual_seed(seed)
58
  torch.cuda.manual_seed_all(seed)
 
59
  np.random.seed(seed)
60
 
61
  def generate_id():
@@ -80,9 +131,15 @@ class TTSResponse(BaseModel):
80
 
81
  # Load model at startup
82
  try:
83
- get_or_load_model()
 
 
 
 
 
84
  except Exception as e:
85
  logger.error(f"Failed to load model on startup: {e}")
 
86
 
87
  @spaces.GPU
88
  def generate_tts_audio(
@@ -435,12 +492,15 @@ def create_gradio_interface():
435
  """)
436
 
437
  # System info
438
- model_status = "βœ… Loaded" if MODEL else "❌ Not Loaded"
439
  gr.Markdown(f"""
440
  ### πŸ“Š System Status
441
  - **Model**: {model_status}
442
  - **Device**: {DEVICE}
443
  - **Generated Files**: {len(audio_cache)}
 
 
 
444
  """)
445
 
446
  return demo
@@ -450,9 +510,16 @@ if __name__ == "__main__":
450
  logger.info("πŸŽ‰ Starting ChatterboxTTS Service...")
451
 
452
  # Model status
453
- model_status = "βœ… Loaded" if MODEL else "❌ Not Loaded"
 
 
 
 
 
 
454
  logger.info(f"Model Status: {model_status}")
455
  logger.info(f"Device: {DEVICE}")
 
456
 
457
  if os.getenv("SPACE_ID"):
458
  # Running in Hugging Face Spaces
 
1
  import os
2
  import time
3
  import torch
4
+ import random
5
  import numpy as np
6
  import soundfile as sf
7
  import tempfile
8
  import uuid
9
  import logging
10
+ import requests
11
+ import io
12
  from typing import Optional, Dict, Any
13
  from pathlib import Path
14
 
 
19
  from fastapi.middleware.cors import CORSMiddleware
20
  from pydantic import BaseModel
21
 
22
+ # ChatterboxTTS import - you need to install this separately
23
+ # For now, we'll create a mock implementation that you can replace
24
+ try:
25
+ from chatterbox.src.chatterbox.tts import ChatterboxTTS
26
+ CHATTERBOX_AVAILABLE = True
27
+ except ImportError:
28
+ CHATTERBOX_AVAILABLE = False
29
+ print("⚠️ ChatterboxTTS not found. Using mock implementation.")
30
+ print("πŸ“¦ Install ChatterboxTTS: pip install chatterbox-tts")
31
+
32
+ # Mock ChatterboxTTS for demonstration
33
+ class ChatterboxTTS:
34
+ def __init__(self, device="cpu"):
35
+ self.device = device
36
+ self.sr = 24000
37
+
38
+ @classmethod
39
+ def from_pretrained(cls, device):
40
+ return cls(device)
41
+
42
+ def to(self, device):
43
+ self.device = device
44
+ return self
45
+
46
+ def generate(self, text, audio_prompt_path=None, exaggeration=0.5,
47
+ temperature=0.8, cfg_weight=0.5):
48
+ # Generate mock audio - replace this with real ChatterboxTTS
49
+ duration = min(len(text) * 0.1, 10.0)
50
+ t = np.linspace(0, duration, int(self.sr * duration))
51
+
52
+ # Create more realistic mock audio
53
+ words = len(text.split())
54
+ freq_base = 150 + (words % 50) * 5 # Vary by content
55
+
56
+ # Generate speech-like waveform
57
+ audio = np.zeros_like(t)
58
+ for i in range(3): # Multiple harmonics
59
+ freq = freq_base * (i + 1)
60
+ envelope = np.exp(-t / (duration * 0.7))
61
+ wave = 0.2 * np.sin(2 * np.pi * freq * t + i) * envelope
62
+ audio += wave
63
+
64
+ # Add some variation based on parameters
65
+ audio *= (0.5 + exaggeration)
66
+ if temperature > 1.0:
67
+ noise = np.random.normal(0, 0.05, len(audio))
68
+ audio += noise
69
+
70
+ return torch.tensor(audio).unsqueeze(0)
71
 
72
  # Configure logging
73
  logging.basicConfig(level=logging.INFO)
 
106
  if DEVICE == "cuda":
107
  torch.cuda.manual_seed(seed)
108
  torch.cuda.manual_seed_all(seed)
109
+ random.seed(seed)
110
  np.random.seed(seed)
111
 
112
  def generate_id():
 
131
 
132
  # Load model at startup
133
  try:
134
+ if CHATTERBOX_AVAILABLE:
135
+ get_or_load_model()
136
+ print("βœ… ChatterboxTTS model loaded successfully")
137
+ else:
138
+ MODEL = ChatterboxTTS.from_pretrained(DEVICE)
139
+ print("⚠️ Using mock ChatterboxTTS implementation")
140
  except Exception as e:
141
  logger.error(f"Failed to load model on startup: {e}")
142
+ MODEL = None
143
 
144
  @spaces.GPU
145
  def generate_tts_audio(
 
492
  """)
493
 
494
  # System info
495
+ model_status = "βœ… Real ChatterboxTTS" if CHATTERBOX_AVAILABLE and MODEL else "⚠️ Mock Implementation" if MODEL else "❌ Not Loaded"
496
  gr.Markdown(f"""
497
  ### πŸ“Š System Status
498
  - **Model**: {model_status}
499
  - **Device**: {DEVICE}
500
  - **Generated Files**: {len(audio_cache)}
501
+ - **ChatterboxTTS Available**: {CHATTERBOX_AVAILABLE}
502
+
503
+ {"" if CHATTERBOX_AVAILABLE else "**Note**: Install ChatterboxTTS for production use: `pip install chatterbox-tts`"}
504
  """)
505
 
506
  return demo
 
510
  logger.info("πŸŽ‰ Starting ChatterboxTTS Service...")
511
 
512
  # Model status
513
+ if CHATTERBOX_AVAILABLE and MODEL:
514
+ model_status = "βœ… Real ChatterboxTTS Loaded"
515
+ elif MODEL:
516
+ model_status = "⚠️ Mock ChatterboxTTS (Install real package for production)"
517
+ else:
518
+ model_status = "❌ No Model Loaded"
519
+
520
  logger.info(f"Model Status: {model_status}")
521
  logger.info(f"Device: {DEVICE}")
522
+ logger.info(f"ChatterboxTTS Available: {CHATTERBOX_AVAILABLE}")
523
 
524
  if os.getenv("SPACE_ID"):
525
  # Running in Hugging Face Spaces