Spaces:

husseinelsaadi
/

ai-interviewer-demo

Paused

App Files Files Community

husseinelsaadi commited on Jul 16

Commit

84b9e0a

verified ·

1 Parent(s): 269d410

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -10

app.py CHANGED Viewed

@@ -143,6 +143,8 @@ judge_llm = AutoModelForCausalLM.from_pretrained(
     device_map="auto",
     token=hf_api_key
 )
 print(judge_llm.hf_device_map)
@@ -153,10 +155,11 @@ judge_pipeline = pipeline(
     max_new_tokens=128,
     temperature=0.3,
     top_p=0.9,
-    do_sample=False,
     repetition_penalty=1.1,
 )
 output = judge_pipeline("Q: What is Python?\nA:", max_new_tokens=128)[0]['generated_text']
 print(output)
@@ -1481,25 +1484,22 @@ bark_voice_preset = "v2/en_speaker_5"
 def bark_tts(text):
     print(f"🔁 Synthesizing TTS for: {text}")
-    # DON'T pass voice_preset here — it will inject generation kwargs internally!
-    processed = processor_bark(text, return_tensors="pt")
-    input_ids = processed["input_ids"].to(model_bark.device)
-    attention_mask = processed.get("attention_mask", None)
     if attention_mask is not None:
         attention_mask = attention_mask.to(model_bark.device)
     start = time.time()
-    # Pass ONLY these manually — clean, controlled
     speech_values = model_bark.generate(
         input_ids=input_ids,
         attention_mask=attention_mask,
-        pad_token_id=10000,
-        max_new_tokens=100,
     )
     print(f"✅ Bark finished in {round(time.time() - start, 2)}s")
-    # Post-processing
     speech = speech_values.cpu().numpy().squeeze()
     speech = (speech * 32767).astype(np.int16)
     temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
@@ -1509,6 +1509,7 @@ def bark_tts(text):
 # Whisper STT
 print("🔁 Loading Whisper model...")
 whisper_model = whisper.load_model("base", device="cuda")

     device_map="auto",
     token=hf_api_key
 )
+judge_llm.config.pad_token_id = judge_llm.config.eos_token_id
 print(judge_llm.hf_device_map)
     max_new_tokens=128,
     temperature=0.3,
     top_p=0.9,
+    do_sample=True,  # Optional but recommended with temperature/top_p
     repetition_penalty=1.1,
 )
 output = judge_pipeline("Q: What is Python?\nA:", max_new_tokens=128)[0]['generated_text']
 print(output)
 def bark_tts(text):
     print(f"🔁 Synthesizing TTS for: {text}")
+    inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
+    input_ids = inputs["input_ids"].to(model_bark.device)
+    attention_mask = inputs.get("attention_mask", None)
     if attention_mask is not None:
         attention_mask = attention_mask.to(model_bark.device)
     start = time.time()
+    # ✅ DO NOT use **inputs here to avoid duplicate keys
     speech_values = model_bark.generate(
         input_ids=input_ids,
         attention_mask=attention_mask,
+        max_new_tokens=100,  # Only here
+        pad_token_id=model_bark.config.eos_token_id  # Optional
     )
     print(f"✅ Bark finished in {round(time.time() - start, 2)}s")
     speech = speech_values.cpu().numpy().squeeze()
     speech = (speech * 32767).astype(np.int16)
     temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
 # Whisper STT
 print("🔁 Loading Whisper model...")
 whisper_model = whisper.load_model("base", device="cuda")