Spaces:

husseinelsaadi
/

ai-interviewer-demo

Paused

husseinelsaadi commited on 1 day ago

Commit

c5f793f

verified ·

1 Parent(s): 1751c99

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1751,7 +1751,7 @@ gc.collect()
 model_bark = None
 processor_bark = None
 whisper_model = None
-bark_voice_preset = "v2/en_speaker_1"
 # Thread pool for async operations
 executor = ThreadPoolExecutor(max_workers=2)
@@ -1788,31 +1788,31 @@ def load_models_lazy():
         print(f"✅ Whisper model loaded on {device}")
 def bark_tts_async(text):
-    """Async TTS generation"""
     def _generate():
-        load_models_lazy()  # Load only when needed
-        print(f"🔁 Synthesizing TTS for: {text}")
-        # Ensure we're using the correct device
         device = next(model_bark.parameters()).device
-        print(f"🔁 Bark model is on device: {device}")
         inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
-        input_ids = inputs["input_ids"].to(device)  # Move to same device as model
-        start = time.time()
         with torch.no_grad():
-            speech_values = model_bark.generate(input_ids=input_ids)
-        print(f"✅ Bark finished in {round(time.time() - start, 2)}s on {device}")
         speech = speech_values.cpu().numpy().squeeze()
         speech = (speech * 32767).astype(np.int16)
         temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
         wavfile.write(temp_wav.name, 22050, speech)
         return temp_wav.name
     return executor.submit(_generate)
 def whisper_stt(audio_path):
     """Lazy loading whisper STT"""
     if not audio_path or not os.path.exists(audio_path):

 model_bark = None
 processor_bark = None
 whisper_model = None
+bark_voice_preset = "v2/en_speaker_6"
 # Thread pool for async operations
 executor = ThreadPoolExecutor(max_workers=2)
         print(f"✅ Whisper model loaded on {device}")
 def bark_tts_async(text):
+    """Fully correct async TTS generation with Bark"""
     def _generate():
+        load_models_lazy()
         device = next(model_bark.parameters()).device
+        print(f"🔁 Bark model on: {device}")
+        print(f"🎙️ Speaking: {text}")
+        # 🧠 Prepare full input using processor (not just input_ids)
         inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        # ✅ Generate using unpacked args — this includes all required prompt tensors
         with torch.no_grad():
+            speech_values = model_bark.generate(**inputs)
+        # ✅ Convert to audio
         speech = speech_values.cpu().numpy().squeeze()
         speech = (speech * 32767).astype(np.int16)
         temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
         wavfile.write(temp_wav.name, 22050, speech)
         return temp_wav.name
     return executor.submit(_generate)
 def whisper_stt(audio_path):
     """Lazy loading whisper STT"""
     if not audio_path or not os.path.exists(audio_path):