Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,14 @@
|
|
1 |
import os
|
2 |
import time
|
3 |
import torch
|
|
|
4 |
import numpy as np
|
5 |
import soundfile as sf
|
6 |
import tempfile
|
7 |
import uuid
|
8 |
import logging
|
|
|
|
|
9 |
from typing import Optional, Dict, Any
|
10 |
from pathlib import Path
|
11 |
|
@@ -16,8 +19,55 @@ from fastapi.responses import StreamingResponse
|
|
16 |
from fastapi.middleware.cors import CORSMiddleware
|
17 |
from pydantic import BaseModel
|
18 |
|
19 |
-
#
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
# Configure logging
|
23 |
logging.basicConfig(level=logging.INFO)
|
@@ -56,6 +106,7 @@ def set_seed(seed: int):
|
|
56 |
if DEVICE == "cuda":
|
57 |
torch.cuda.manual_seed(seed)
|
58 |
torch.cuda.manual_seed_all(seed)
|
|
|
59 |
np.random.seed(seed)
|
60 |
|
61 |
def generate_id():
|
@@ -80,9 +131,15 @@ class TTSResponse(BaseModel):
|
|
80 |
|
81 |
# Load model at startup
|
82 |
try:
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
84 |
except Exception as e:
|
85 |
logger.error(f"Failed to load model on startup: {e}")
|
|
|
86 |
|
87 |
@spaces.GPU
|
88 |
def generate_tts_audio(
|
@@ -435,12 +492,15 @@ def create_gradio_interface():
|
|
435 |
""")
|
436 |
|
437 |
# System info
|
438 |
-
model_status = "β
|
439 |
gr.Markdown(f"""
|
440 |
### π System Status
|
441 |
- **Model**: {model_status}
|
442 |
- **Device**: {DEVICE}
|
443 |
- **Generated Files**: {len(audio_cache)}
|
|
|
|
|
|
|
444 |
""")
|
445 |
|
446 |
return demo
|
@@ -450,9 +510,16 @@ if __name__ == "__main__":
|
|
450 |
logger.info("π Starting ChatterboxTTS Service...")
|
451 |
|
452 |
# Model status
|
453 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
454 |
logger.info(f"Model Status: {model_status}")
|
455 |
logger.info(f"Device: {DEVICE}")
|
|
|
456 |
|
457 |
if os.getenv("SPACE_ID"):
|
458 |
# Running in Hugging Face Spaces
|
|
|
1 |
import os
|
2 |
import time
|
3 |
import torch
|
4 |
+
import random
|
5 |
import numpy as np
|
6 |
import soundfile as sf
|
7 |
import tempfile
|
8 |
import uuid
|
9 |
import logging
|
10 |
+
import requests
|
11 |
+
import io
|
12 |
from typing import Optional, Dict, Any
|
13 |
from pathlib import Path
|
14 |
|
|
|
19 |
from fastapi.middleware.cors import CORSMiddleware
|
20 |
from pydantic import BaseModel
|
21 |
|
22 |
+
# ChatterboxTTS import - you need to install this separately
|
23 |
+
# For now, we'll create a mock implementation that you can replace
|
24 |
+
try:
|
25 |
+
from chatterbox.src.chatterbox.tts import ChatterboxTTS
|
26 |
+
CHATTERBOX_AVAILABLE = True
|
27 |
+
except ImportError:
|
28 |
+
CHATTERBOX_AVAILABLE = False
|
29 |
+
print("β οΈ ChatterboxTTS not found. Using mock implementation.")
|
30 |
+
print("π¦ Install ChatterboxTTS: pip install chatterbox-tts")
|
31 |
+
|
32 |
+
# Mock ChatterboxTTS for demonstration
|
33 |
+
class ChatterboxTTS:
|
34 |
+
def __init__(self, device="cpu"):
|
35 |
+
self.device = device
|
36 |
+
self.sr = 24000
|
37 |
+
|
38 |
+
@classmethod
|
39 |
+
def from_pretrained(cls, device):
|
40 |
+
return cls(device)
|
41 |
+
|
42 |
+
def to(self, device):
|
43 |
+
self.device = device
|
44 |
+
return self
|
45 |
+
|
46 |
+
def generate(self, text, audio_prompt_path=None, exaggeration=0.5,
|
47 |
+
temperature=0.8, cfg_weight=0.5):
|
48 |
+
# Generate mock audio - replace this with real ChatterboxTTS
|
49 |
+
duration = min(len(text) * 0.1, 10.0)
|
50 |
+
t = np.linspace(0, duration, int(self.sr * duration))
|
51 |
+
|
52 |
+
# Create more realistic mock audio
|
53 |
+
words = len(text.split())
|
54 |
+
freq_base = 150 + (words % 50) * 5 # Vary by content
|
55 |
+
|
56 |
+
# Generate speech-like waveform
|
57 |
+
audio = np.zeros_like(t)
|
58 |
+
for i in range(3): # Multiple harmonics
|
59 |
+
freq = freq_base * (i + 1)
|
60 |
+
envelope = np.exp(-t / (duration * 0.7))
|
61 |
+
wave = 0.2 * np.sin(2 * np.pi * freq * t + i) * envelope
|
62 |
+
audio += wave
|
63 |
+
|
64 |
+
# Add some variation based on parameters
|
65 |
+
audio *= (0.5 + exaggeration)
|
66 |
+
if temperature > 1.0:
|
67 |
+
noise = np.random.normal(0, 0.05, len(audio))
|
68 |
+
audio += noise
|
69 |
+
|
70 |
+
return torch.tensor(audio).unsqueeze(0)
|
71 |
|
72 |
# Configure logging
|
73 |
logging.basicConfig(level=logging.INFO)
|
|
|
106 |
if DEVICE == "cuda":
|
107 |
torch.cuda.manual_seed(seed)
|
108 |
torch.cuda.manual_seed_all(seed)
|
109 |
+
random.seed(seed)
|
110 |
np.random.seed(seed)
|
111 |
|
112 |
def generate_id():
|
|
|
131 |
|
132 |
# Load model at startup
|
133 |
try:
|
134 |
+
if CHATTERBOX_AVAILABLE:
|
135 |
+
get_or_load_model()
|
136 |
+
print("β
ChatterboxTTS model loaded successfully")
|
137 |
+
else:
|
138 |
+
MODEL = ChatterboxTTS.from_pretrained(DEVICE)
|
139 |
+
print("β οΈ Using mock ChatterboxTTS implementation")
|
140 |
except Exception as e:
|
141 |
logger.error(f"Failed to load model on startup: {e}")
|
142 |
+
MODEL = None
|
143 |
|
144 |
@spaces.GPU
|
145 |
def generate_tts_audio(
|
|
|
492 |
""")
|
493 |
|
494 |
# System info
|
495 |
+
model_status = "β
Real ChatterboxTTS" if CHATTERBOX_AVAILABLE and MODEL else "β οΈ Mock Implementation" if MODEL else "β Not Loaded"
|
496 |
gr.Markdown(f"""
|
497 |
### π System Status
|
498 |
- **Model**: {model_status}
|
499 |
- **Device**: {DEVICE}
|
500 |
- **Generated Files**: {len(audio_cache)}
|
501 |
+
- **ChatterboxTTS Available**: {CHATTERBOX_AVAILABLE}
|
502 |
+
|
503 |
+
{"" if CHATTERBOX_AVAILABLE else "**Note**: Install ChatterboxTTS for production use: `pip install chatterbox-tts`"}
|
504 |
""")
|
505 |
|
506 |
return demo
|
|
|
510 |
logger.info("π Starting ChatterboxTTS Service...")
|
511 |
|
512 |
# Model status
|
513 |
+
if CHATTERBOX_AVAILABLE and MODEL:
|
514 |
+
model_status = "β
Real ChatterboxTTS Loaded"
|
515 |
+
elif MODEL:
|
516 |
+
model_status = "β οΈ Mock ChatterboxTTS (Install real package for production)"
|
517 |
+
else:
|
518 |
+
model_status = "β No Model Loaded"
|
519 |
+
|
520 |
logger.info(f"Model Status: {model_status}")
|
521 |
logger.info(f"Device: {DEVICE}")
|
522 |
+
logger.info(f"ChatterboxTTS Available: {CHATTERBOX_AVAILABLE}")
|
523 |
|
524 |
if os.getenv("SPACE_ID"):
|
525 |
# Running in Hugging Face Spaces
|