Spaces:

ceymox
/

Chatterbox_AP

Sleeping

App Files Files Community

ceymox commited on Jun 11

Commit

81fa7f6

verified ·

1 Parent(s): 39aace3

Update app.py

Browse files

Files changed (1) hide show

app.py +131 -36

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import logging
 import requests
 import io
 import json
 from typing import Optional, Dict, Any, List
 from pathlib import Path
@@ -32,9 +33,18 @@ logger.info(f"🚀 Running on device: {DEVICE}")
 MODEL = None
 CHATTERBOX_AVAILABLE = False
-# Storage directories
-AUDIO_DIR = "generated_audio"
-VOICES_DIR = "custom_voices"
 os.makedirs(AUDIO_DIR, exist_ok=True)
 os.makedirs(VOICES_DIR, exist_ok=True)
@@ -62,67 +72,133 @@ BUILTIN_VOICES = {
     }
 }
 def load_voice_library():
-    """Load saved custom voices from disk"""
     global voice_library
     voice_library = BUILTIN_VOICES.copy()
     voices_json_path = os.path.join(VOICES_DIR, "voices.json")
-    if os.path.exists(voices_json_path):
-        try:
             with open(voices_json_path, 'r', encoding='utf-8') as f:
                 custom_voices = json.load(f)
                 voice_library.update(custom_voices)
-            logger.info(f"✅ Loaded {len(custom_voices)} custom voices from disk")
-        except Exception as e:
-            logger.error(f"❌ Error loading voice library: {e}")
 def save_voice_library():
-    """Save custom voices to disk"""
     try:
         # Only save custom voices (not builtin)
         custom_voices = {k: v for k, v in voice_library.items() if v.get("type") != "builtin"}
         voices_json_path = os.path.join(VOICES_DIR, "voices.json")
         with open(voices_json_path, 'w', encoding='utf-8') as f:
             json.dump(custom_voices, f, ensure_ascii=False, indent=2)
-        logger.info(f"✅ Saved {len(custom_voices)} custom voices to disk")
     except Exception as e:
         logger.error(f"❌ Error saving voice library: {e}")
 def create_voice_from_audio(audio_file, voice_name, voice_description="Custom voice"):
-    """Create a new voice from uploaded audio"""
     try:
         voice_id = f"voice_{int(time.time())}_{uuid.uuid4().hex[:8]}"
-        # Save audio file
-        audio_filename = f"{voice_id}.wav"
-        audio_path = os.path.join(VOICES_DIR, audio_filename)
-        # Convert and save audio
         if isinstance(audio_file, tuple):
             # Gradio audio format (sample_rate, audio_data)
             sample_rate, audio_data = audio_file
-            sf.write(audio_path, audio_data, sample_rate)
         else:
-            # File upload
-            sf.write(audio_path, audio_file, 22050)  # Default sample rate
-        # Create voice entry
         voice_entry = {
             "voice_id": voice_id,
             "name": voice_name,
             "description": voice_description,
-            "audio_path": audio_path,
             "type": "custom",
-            "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ")
         }
         # Add to voice library
         voice_library[voice_id] = voice_entry
         save_voice_library()
-        logger.info(f"✅ Created voice: {voice_name} ({voice_id})")
         return voice_id, voice_entry
     except Exception as e:
@@ -154,14 +230,25 @@ def download_audio_from_url(url):
         return None
 def get_voice_audio_path(voice_id):
-    """Get the audio path for a voice (download if URL, return path if local)"""
     if voice_id not in voice_library:
         return None
     voice_info = voice_library[voice_id]
-    # If it's a custom voice with local file
-    if voice_info.get("type") == "custom" and "audio_path" in voice_info:
         audio_path = voice_info["audio_path"]
         if os.path.exists(audio_path):
             return audio_path
@@ -370,8 +457,12 @@ def generate_tts_audio(
     temp_audio_file = None
     try:
-        if audio_prompt_path and audio_prompt_path.startswith('/tmp/'):
-            # It's a temporary file from URL download
             temp_audio_file = audio_prompt_path
         if audio_prompt_path:
@@ -400,8 +491,8 @@ def generate_tts_audio(
         logger.error(f"❌ Audio generation failed: {e}")
         raise
     finally:
-        # Clean up temporary file (only if it's a downloaded URL)
-        if temp_audio_file and temp_audio_file.startswith('/tmp/') and os.path.exists(temp_audio_file):
             try:
                 os.unlink(temp_audio_file)
                 logger.info(f"🗑️ Cleaned up temporary file: {temp_audio_file}")
@@ -529,7 +620,7 @@ async def delete_voice(voice_id: str):
         raise HTTPException(status_code=400, detail="Cannot delete builtin voices")
     try:
-        # Delete audio file
         if "audio_path" in voice_info and os.path.exists(voice_info["audio_path"]):
             os.unlink(voice_info["audio_path"])
@@ -538,6 +629,8 @@ async def delete_voice(voice_id: str):
         del voice_library[voice_id]
         save_voice_library()
         return {
             "success": True,
             "message": f"Voice '{voice_name}' deleted successfully"
@@ -780,7 +873,7 @@ def create_gradio_interface():
             voice_name = voice_info["name"]
-            # Delete audio file
             if "audio_path" in voice_info and os.path.exists(voice_info["audio_path"]):
                 os.unlink(voice_info["audio_path"])
@@ -789,6 +882,8 @@ def create_gradio_interface():
             save_voice_library()
             updated_choices = get_voice_choices()
             return (
                 f"✅ Voice '{voice_name}' deleted successfully",
                 gr.update(choices=updated_choices, value=updated_choices[0][1] if updated_choices else None)
@@ -1035,14 +1130,14 @@ def create_gradio_interface():
         - **Device**: {DEVICE}
         - **ChatterboxTTS**: {chatterbox_status}
         - **Voice Library**: {len(voice_library)} voices loaded
         - **Generated Files**: {len(audio_cache)}
-        - **Storage**: `{VOICES_DIR}/` for voices, `{AUDIO_DIR}/` for output
         {'''### 🎉 Production Ready!
-        Your ChatterboxTTS model is loaded with voice management system.''' if CHATTERBOX_AVAILABLE else '''### ⚠️ Action Required
         **You're hearing beep sounds because ChatterboxTTS isn't loaded.**
-        Voice management is working, but you need ChatterboxTTS for real synthesis.'''}
         """)
     return demo

 import requests
 import io
 import json
+import base64
 from typing import Optional, Dict, Any, List
 from pathlib import Path
 MODEL = None
 CHATTERBOX_AVAILABLE = False
+# Storage directories - use persistent storage if available
+if os.path.exists("/data"):
+    # Hugging Face Spaces persistent storage
+    VOICES_DIR = "/data/custom_voices"
+    AUDIO_DIR = "/data/generated_audio"
+    logger.info("✅ Using Hugging Face Spaces persistent storage (/data)")
+else:
+    # Fallback to local storage
+    VOICES_DIR = "custom_voices"
+    AUDIO_DIR = "generated_audio"
+    logger.warning("⚠️ Using local storage (voices will not persist)")
 os.makedirs(AUDIO_DIR, exist_ok=True)
 os.makedirs(VOICES_DIR, exist_ok=True)
     }
 }
+def encode_audio_to_base64(audio_data, sample_rate):
+    """Encode audio data to base64 string for storage"""
+    try:
+        # Create temporary file
+        temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+        sf.write(temp_file.name, audio_data, sample_rate)
+        # Read as bytes and encode
+        with open(temp_file.name, 'rb') as f:
+            audio_bytes = f.read()
+        # Cleanup temp file
+        os.unlink(temp_file.name)
+        # Encode to base64
+        return base64.b64encode(audio_bytes).decode('utf-8')
+    except Exception as e:
+        logger.error(f"Error encoding audio: {e}")
+        return None
+def decode_audio_from_base64(base64_string):
+    """Decode base64 string back to audio file"""
+    try:
+        # Decode base64
+        audio_bytes = base64.b64decode(base64_string.encode('utf-8'))
+        # Create temporary file
+        temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+        temp_file.write(audio_bytes)
+        temp_file.close()
+        return temp_file.name
+    except Exception as e:
+        logger.error(f"Error decoding audio: {e}")
+        return None
 def load_voice_library():
+    """Load saved custom voices from persistent storage"""
     global voice_library
     voice_library = BUILTIN_VOICES.copy()
     voices_json_path = os.path.join(VOICES_DIR, "voices.json")
+    try:
+        if os.path.exists(voices_json_path):
             with open(voices_json_path, 'r', encoding='utf-8') as f:
                 custom_voices = json.load(f)
                 voice_library.update(custom_voices)
+            logger.info(f"✅ Loaded {len(custom_voices)} custom voices from persistent storage")
+        else:
+            logger.info("📁 No existing voice library found, starting fresh")
+        # Log voice library status
+        total_voices = len(voice_library)
+        custom_count = len([v for v in voice_library.values() if v.get("type") == "custom"])
+        builtin_count = len([v for v in voice_library.values() if v.get("type") == "builtin"])
+        logger.info(f"📚 Voice Library: {total_voices} total ({builtin_count} builtin, {custom_count} custom)")
+    except Exception as e:
+        logger.error(f"❌ Error loading voice library: {e}")
+        logger.info("🔄 Starting with builtin voices only")
 def save_voice_library():
+    """Save custom voices to persistent storage"""
     try:
         # Only save custom voices (not builtin)
         custom_voices = {k: v for k, v in voice_library.items() if v.get("type") != "builtin"}
         voices_json_path = os.path.join(VOICES_DIR, "voices.json")
+        # Ensure directory exists
+        os.makedirs(os.path.dirname(voices_json_path), exist_ok=True)
         with open(voices_json_path, 'w', encoding='utf-8') as f:
             json.dump(custom_voices, f, ensure_ascii=False, indent=2)
+        logger.info(f"✅ Saved {len(custom_voices)} custom voices to persistent storage")
+        logger.info(f"📁 Storage location: {voices_json_path}")
+        # Verify the save worked
+        if os.path.exists(voices_json_path):
+            file_size = os.path.getsize(voices_json_path)
+            logger.info(f"📊 Voice library file size: {file_size} bytes")
     except Exception as e:
         logger.error(f"❌ Error saving voice library: {e}")
+        logger.error(f"📁 Attempted path: {voices_json_path}")
 def create_voice_from_audio(audio_file, voice_name, voice_description="Custom voice"):
+    """Create a new voice from uploaded audio with persistent storage"""
     try:
         voice_id = f"voice_{int(time.time())}_{uuid.uuid4().hex[:8]}"
+        # Handle different audio input formats
         if isinstance(audio_file, tuple):
             # Gradio audio format (sample_rate, audio_data)
             sample_rate, audio_data = audio_file
         else:
+            # File path - load the audio
+            audio_data, sample_rate = sf.read(audio_file)
+        # Encode audio to base64 for persistent storage
+        audio_base64 = encode_audio_to_base64(audio_data, sample_rate)
+        if audio_base64 is None:
+            raise ValueError("Failed to encode audio")
+        # Create voice entry with embedded audio
         voice_entry = {
             "voice_id": voice_id,
             "name": voice_name,
             "description": voice_description,
+            "audio_base64": audio_base64,  # Store audio as base64
+            "sample_rate": int(sample_rate),
             "type": "custom",
+            "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
+            "audio_duration": len(audio_data) / sample_rate
         }
         # Add to voice library
         voice_library[voice_id] = voice_entry
+        # Save to persistent storage
         save_voice_library()
+        logger.info(f"✅ Created persistent voice: {voice_name} ({voice_id})")
+        logger.info(f"🎵 Audio: {len(audio_data)} samples, {sample_rate}Hz, {voice_entry['audio_duration']:.2f}s")
         return voice_id, voice_entry
     except Exception as e:
         return None
 def get_voice_audio_path(voice_id):
+    """Get the audio path for a voice (decode from base64 if custom, download if builtin)"""
     if voice_id not in voice_library:
         return None
     voice_info = voice_library[voice_id]
+    # If it's a custom voice with base64 audio
+    if voice_info.get("type") == "custom" and "audio_base64" in voice_info:
+        # Decode base64 to temporary file
+        temp_path = decode_audio_from_base64(voice_info["audio_base64"])
+        if temp_path:
+            logger.info(f"✅ Decoded custom voice audio: {voice_info['name']}")
+            return temp_path
+        else:
+            logger.warning(f"⚠️ Failed to decode audio for voice {voice_id}")
+            return None
+    # If it's a legacy custom voice with file path (for backward compatibility)
+    elif voice_info.get("type") == "custom" and "audio_path" in voice_info:
         audio_path = voice_info["audio_path"]
         if os.path.exists(audio_path):
             return audio_path
     temp_audio_file = None
     try:
+        # Get audio path for the voice
+        audio_prompt_path = get_voice_audio_path(voice_id)
+        temp_audio_file = None
+        # Check if we got a temporary file (from base64 decode or URL download)
+        if audio_prompt_path and (audio_prompt_path.startswith('/tmp/') or 'temp' in audio_prompt_path):
             temp_audio_file = audio_prompt_path
         if audio_prompt_path:
         logger.error(f"❌ Audio generation failed: {e}")
         raise
     finally:
+        # Clean up temporary file (only if it's a downloaded URL or decoded audio)
+        if temp_audio_file and os.path.exists(temp_audio_file):
             try:
                 os.unlink(temp_audio_file)
                 logger.info(f"🗑️ Cleaned up temporary file: {temp_audio_file}")
         raise HTTPException(status_code=400, detail="Cannot delete builtin voices")
     try:
+        # Delete legacy audio file if it exists
         if "audio_path" in voice_info and os.path.exists(voice_info["audio_path"]):
             os.unlink(voice_info["audio_path"])
         del voice_library[voice_id]
         save_voice_library()
+        logger.info(f"✅ Deleted voice: {voice_name} ({voice_id})")
         return {
             "success": True,
             "message": f"Voice '{voice_name}' deleted successfully"
             voice_name = voice_info["name"]
+            # Delete legacy audio file if it exists
             if "audio_path" in voice_info and os.path.exists(voice_info["audio_path"]):
                 os.unlink(voice_info["audio_path"])
             save_voice_library()
             updated_choices = get_voice_choices()
+            logger.info(f"✅ UI: Deleted voice {voice_name} ({voice_id})")
             return (
                 f"✅ Voice '{voice_name}' deleted successfully",
                 gr.update(choices=updated_choices, value=updated_choices[0][1] if updated_choices else None)
         - **Device**: {DEVICE}
         - **ChatterboxTTS**: {chatterbox_status}
         - **Voice Library**: {len(voice_library)} voices loaded
+        - **Storage**: {"✅ Persistent (/data)" if VOICES_DIR.startswith("/data") else "⚠️ Temporary"}
         - **Generated Files**: {len(audio_cache)}
         {'''### 🎉 Production Ready!
+        Your ChatterboxTTS model is loaded with persistent voice management.''' if CHATTERBOX_AVAILABLE else '''### ⚠️ Action Required
         **You're hearing beep sounds because ChatterboxTTS isn't loaded.**
+        Voice management is working with persistent storage, but you need ChatterboxTTS for real synthesis.'''}
         """)
     return demo