Spaces:

ceymox
/

TTS-Live_conversation_engine-AP

Sleeping

App Files Files Community

ceymox commited on 22 days ago

Commit

a2fb11d

verified ·

1 Parent(s): 4ab43d0

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -14

app.py CHANGED Viewed

@@ -73,7 +73,6 @@ class TTSModelWrapper:
 def load_tts_model_with_retry(max_retries=3, retry_delay=5):
     global tts_model, tts_model_wrapper
-    # First, check if model is already in cache
     print("Checking if TTS model is in cache...")
     try:
         cache_info = scan_cache_dir()
@@ -83,15 +82,15 @@ def load_tts_model_with_retry(max_retries=3, retry_delay=5):
             tts_model = AutoModel.from_pretrained(
                 tts_repo_id,
                 trust_remote_code=True,
-                local_files_only=True
-            ).to(device)
             tts_model_wrapper = TTSModelWrapper(tts_model)
             print("TTS model loaded from cache successfully!")
             return
     except Exception as e:
         print(f"Cache check failed: {e}")
-    # If not in cache or cache check failed, try loading with retries
     for attempt in range(max_retries):
         try:
             print(f"Loading {tts_repo_id} model (attempt {attempt+1}/{max_retries})...")
@@ -100,21 +99,19 @@ def load_tts_model_with_retry(max_retries=3, retry_delay=5):
                 trust_remote_code=True,
                 revision="main",
                 use_auth_token=HF_TOKEN,
-                low_cpu_mem_usage=True
-            ).to(device)
             tts_model_wrapper = TTSModelWrapper(tts_model)
             print(f"TTS model loaded successfully! Type: {type(tts_model)}")
-            return  # Success, exit function
         except Exception as e:
             print(f"⚠️ Attempt {attempt+1}/{max_retries} failed: {e}")
             if attempt < max_retries - 1:
                 print(f"Waiting {retry_delay} seconds before retrying...")
                 time.sleep(retry_delay)
-                retry_delay *= 1.5  # Exponential backoff
-    # If all attempts failed, try one last time with fallback options
     try:
         print("Trying with fallback options...")
         tts_model = AutoModel.from_pretrained(
@@ -124,14 +121,53 @@ def load_tts_model_with_retry(max_retries=3, retry_delay=5):
             local_files_only=False,
             use_auth_token=HF_TOKEN,
             force_download=False,
-            resume_download=True
-        ).to(device)
         tts_model_wrapper = TTSModelWrapper(tts_model)
         print("TTS model loaded with fallback options!")
     except Exception as e2:
         print(f"❌ All attempts to load TTS model failed: {e2}")
         print("Will continue without TTS model loaded.")
 def load_asr_model():
     global asr_model
     try:
@@ -362,7 +398,7 @@ def enhance_audio(audio_data):
     return audio_data
-def split_into_chunks(text, max_length=30):
     """Split text into smaller chunks based on punctuation and length"""
     # First split by sentences
     sentence_markers = ['.', '?', '!', ';', ':', '।', '॥']

 def load_tts_model_with_retry(max_retries=3, retry_delay=5):
     global tts_model, tts_model_wrapper
     print("Checking if TTS model is in cache...")
     try:
         cache_info = scan_cache_dir()
             tts_model = AutoModel.from_pretrained(
                 tts_repo_id,
                 trust_remote_code=True,
+                local_files_only=True,
+                device_map="auto"  # <-- Use device_map instead of .to(device)
+            )
             tts_model_wrapper = TTSModelWrapper(tts_model)
             print("TTS model loaded from cache successfully!")
             return
     except Exception as e:
         print(f"Cache check failed: {e}")
     for attempt in range(max_retries):
         try:
             print(f"Loading {tts_repo_id} model (attempt {attempt+1}/{max_retries})...")
                 trust_remote_code=True,
                 revision="main",
                 use_auth_token=HF_TOKEN,
+                low_cpu_mem_usage=True,
+                device_map="auto"  # <-- Use device_map here as well
+            )
             tts_model_wrapper = TTSModelWrapper(tts_model)
             print(f"TTS model loaded successfully! Type: {type(tts_model)}")
+            return
         except Exception as e:
             print(f"⚠️ Attempt {attempt+1}/{max_retries} failed: {e}")
             if attempt < max_retries - 1:
                 print(f"Waiting {retry_delay} seconds before retrying...")
                 time.sleep(retry_delay)
+                retry_delay *= 1.5
     try:
         print("Trying with fallback options...")
         tts_model = AutoModel.from_pretrained(
             local_files_only=False,
             use_auth_token=HF_TOKEN,
             force_download=False,
+            resume_download=True,
+            device_map="auto"  # <-- And here too
+        )
         tts_model_wrapper = TTSModelWrapper(tts_model)
         print("TTS model loaded with fallback options!")
     except Exception as e2:
         print(f"❌ All attempts to load TTS model failed: {e2}")
         print("Will continue without TTS model loaded.")
+# Reduce chunk size for faster streaming and lower latency
+def split_into_chunks(text, max_length=15):  # Reduced from 30 to 15
+    sentence_markers = ['.', '?', '!', ';', ':', '।', '॥']
+    chunks = []
+    current = ""
+    for char in text:
+        current += char
+        if char in sentence_markers and current.strip():
+            chunks.append(current.strip())
+            current = ""
+    if current.strip():
+        chunks.append(current.strip())
+    final_chunks = []
+    for chunk in chunks:
+        if len(chunk) <= max_length:
+            final_chunks.append(chunk)
+        else:
+            comma_splits = chunk.split(',')
+            current_part = ""
+            for part in comma_splits:
+                if len(current_part) + len(part) <= max_length:
+                    if current_part:
+                        current_part += ","
+                    current_part += part
+                else:
+                    if current_part:
+                        final_chunks.append(current_part.strip())
+                    current_part = part
+            if current_part:
+                final_chunks.append(current_part.strip())
+    print(f"Split text into {len(final_chunks)} chunks")
+    return final_chunks
+)
 def load_asr_model():
     global asr_model
     try:
     return audio_data
+def split_into_chunks(text, max_length=20):
     """Split text into smaller chunks based on punctuation and length"""
     # First split by sentences
     sentence_markers = ['.', '?', '!', ';', ':', '।', '॥']