Spaces:

ceymox
/

Chatterbox_AP

Sleeping

App Files Files Community

ceymox commited on Jun 11

Commit

e36284f

verified ·

1 Parent(s): 6911c7b

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -5

app.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import os
 import time
 import torch
 import numpy as np
 import soundfile as sf
 import tempfile
 import uuid
 import logging
 from typing import Optional, Dict, Any
 from pathlib import Path
@@ -16,8 +19,55 @@ from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-# Import ChatterboxTTS
-from chatterbox.src.chatterbox.tts import ChatterboxTTS
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -56,6 +106,7 @@ def set_seed(seed: int):
     if DEVICE == "cuda":
         torch.cuda.manual_seed(seed)
         torch.cuda.manual_seed_all(seed)
     np.random.seed(seed)
 def generate_id():
@@ -80,9 +131,15 @@ class TTSResponse(BaseModel):
 # Load model at startup
 try:
-    get_or_load_model()
 except Exception as e:
     logger.error(f"Failed to load model on startup: {e}")
 @spaces.GPU
 def generate_tts_audio(
@@ -435,12 +492,15 @@ def create_gradio_interface():
         """)
         # System info
-        model_status = "✅ Loaded" if MODEL else "❌ Not Loaded"
         gr.Markdown(f"""
         ### 📊 System Status
         - **Model**: {model_status}
         - **Device**: {DEVICE}
         - **Generated Files**: {len(audio_cache)}
         """)
     return demo
@@ -450,9 +510,16 @@ if __name__ == "__main__":
     logger.info("🎉 Starting ChatterboxTTS Service...")
     # Model status
-    model_status = "✅ Loaded" if MODEL else "❌ Not Loaded"
     logger.info(f"Model Status: {model_status}")
     logger.info(f"Device: {DEVICE}")
     if os.getenv("SPACE_ID"):
         # Running in Hugging Face Spaces

 import os
 import time
 import torch
+import random
 import numpy as np
 import soundfile as sf
 import tempfile
 import uuid
 import logging
+import requests
+import io
 from typing import Optional, Dict, Any
 from pathlib import Path
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+# ChatterboxTTS import - you need to install this separately
+# For now, we'll create a mock implementation that you can replace
+try:
+    from chatterbox.src.chatterbox.tts import ChatterboxTTS
+    CHATTERBOX_AVAILABLE = True
+except ImportError:
+    CHATTERBOX_AVAILABLE = False
+    print("⚠️ ChatterboxTTS not found. Using mock implementation.")
+    print("📦 Install ChatterboxTTS: pip install chatterbox-tts")
+    # Mock ChatterboxTTS for demonstration
+    class ChatterboxTTS:
+        def __init__(self, device="cpu"):
+            self.device = device
+            self.sr = 24000
+        @classmethod
+        def from_pretrained(cls, device):
+            return cls(device)
+        def to(self, device):
+            self.device = device
+            return self
+        def generate(self, text, audio_prompt_path=None, exaggeration=0.5,
+                    temperature=0.8, cfg_weight=0.5):
+            # Generate mock audio - replace this with real ChatterboxTTS
+            duration = min(len(text) * 0.1, 10.0)
+            t = np.linspace(0, duration, int(self.sr * duration))
+            # Create more realistic mock audio
+            words = len(text.split())
+            freq_base = 150 + (words % 50) * 5  # Vary by content
+            # Generate speech-like waveform
+            audio = np.zeros_like(t)
+            for i in range(3):  # Multiple harmonics
+                freq = freq_base * (i + 1)
+                envelope = np.exp(-t / (duration * 0.7))
+                wave = 0.2 * np.sin(2 * np.pi * freq * t + i) * envelope
+                audio += wave
+            # Add some variation based on parameters
+            audio *= (0.5 + exaggeration)
+            if temperature > 1.0:
+                noise = np.random.normal(0, 0.05, len(audio))
+                audio += noise
+            return torch.tensor(audio).unsqueeze(0)
 # Configure logging
 logging.basicConfig(level=logging.INFO)
     if DEVICE == "cuda":
         torch.cuda.manual_seed(seed)
         torch.cuda.manual_seed_all(seed)
+    random.seed(seed)
     np.random.seed(seed)
 def generate_id():
 # Load model at startup
 try:
+    if CHATTERBOX_AVAILABLE:
+        get_or_load_model()
+        print("✅ ChatterboxTTS model loaded successfully")
+    else:
+        MODEL = ChatterboxTTS.from_pretrained(DEVICE)
+        print("⚠️ Using mock ChatterboxTTS implementation")
 except Exception as e:
     logger.error(f"Failed to load model on startup: {e}")
+    MODEL = None
 @spaces.GPU
 def generate_tts_audio(
         """)
         # System info
+        model_status = "✅ Real ChatterboxTTS" if CHATTERBOX_AVAILABLE and MODEL else "⚠️ Mock Implementation" if MODEL else "❌ Not Loaded"
         gr.Markdown(f"""
         ### 📊 System Status
         - **Model**: {model_status}
         - **Device**: {DEVICE}
         - **Generated Files**: {len(audio_cache)}
+        - **ChatterboxTTS Available**: {CHATTERBOX_AVAILABLE}
+        {"" if CHATTERBOX_AVAILABLE else "**Note**: Install ChatterboxTTS for production use: `pip install chatterbox-tts`"}
         """)
     return demo
     logger.info("🎉 Starting ChatterboxTTS Service...")
     # Model status
+    if CHATTERBOX_AVAILABLE and MODEL:
+        model_status = "✅ Real ChatterboxTTS Loaded"
+    elif MODEL:
+        model_status = "⚠️ Mock ChatterboxTTS (Install real package for production)"
+    else:
+        model_status = "❌ No Model Loaded"
     logger.info(f"Model Status: {model_status}")
     logger.info(f"Device: {DEVICE}")
+    logger.info(f"ChatterboxTTS Available: {CHATTERBOX_AVAILABLE}")
     if os.getenv("SPACE_ID"):
         # Running in Hugging Face Spaces