husseinelsaadi commited on
Commit
84b9e0a
Β·
verified Β·
1 Parent(s): 269d410

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -143,6 +143,8 @@ judge_llm = AutoModelForCausalLM.from_pretrained(
143
  device_map="auto",
144
  token=hf_api_key
145
  )
 
 
146
 
147
  print(judge_llm.hf_device_map)
148
 
@@ -153,10 +155,11 @@ judge_pipeline = pipeline(
153
  max_new_tokens=128,
154
  temperature=0.3,
155
  top_p=0.9,
156
- do_sample=False,
157
  repetition_penalty=1.1,
158
  )
159
 
 
160
  output = judge_pipeline("Q: What is Python?\nA:", max_new_tokens=128)[0]['generated_text']
161
  print(output)
162
 
@@ -1481,25 +1484,22 @@ bark_voice_preset = "v2/en_speaker_5"
1481
 
1482
  def bark_tts(text):
1483
  print(f"πŸ” Synthesizing TTS for: {text}")
1484
- # DON'T pass voice_preset here β€” it will inject generation kwargs internally!
1485
- processed = processor_bark(text, return_tensors="pt")
1486
-
1487
- input_ids = processed["input_ids"].to(model_bark.device)
1488
- attention_mask = processed.get("attention_mask", None)
1489
  if attention_mask is not None:
1490
  attention_mask = attention_mask.to(model_bark.device)
1491
 
1492
  start = time.time()
1493
- # Pass ONLY these manually β€” clean, controlled
1494
  speech_values = model_bark.generate(
1495
  input_ids=input_ids,
1496
  attention_mask=attention_mask,
1497
- pad_token_id=10000,
1498
- max_new_tokens=100,
1499
  )
1500
  print(f"βœ… Bark finished in {round(time.time() - start, 2)}s")
1501
 
1502
- # Post-processing
1503
  speech = speech_values.cpu().numpy().squeeze()
1504
  speech = (speech * 32767).astype(np.int16)
1505
  temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
@@ -1509,6 +1509,7 @@ def bark_tts(text):
1509
 
1510
 
1511
 
 
1512
  # Whisper STT
1513
  print("πŸ” Loading Whisper model...")
1514
  whisper_model = whisper.load_model("base", device="cuda")
 
143
  device_map="auto",
144
  token=hf_api_key
145
  )
146
+ judge_llm.config.pad_token_id = judge_llm.config.eos_token_id
147
+
148
 
149
  print(judge_llm.hf_device_map)
150
 
 
155
  max_new_tokens=128,
156
  temperature=0.3,
157
  top_p=0.9,
158
+ do_sample=True, # Optional but recommended with temperature/top_p
159
  repetition_penalty=1.1,
160
  )
161
 
162
+
163
  output = judge_pipeline("Q: What is Python?\nA:", max_new_tokens=128)[0]['generated_text']
164
  print(output)
165
 
 
1484
 
1485
  def bark_tts(text):
1486
  print(f"πŸ” Synthesizing TTS for: {text}")
1487
+ inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1488
+ input_ids = inputs["input_ids"].to(model_bark.device)
1489
+ attention_mask = inputs.get("attention_mask", None)
 
 
1490
  if attention_mask is not None:
1491
  attention_mask = attention_mask.to(model_bark.device)
1492
 
1493
  start = time.time()
1494
+ # βœ… DO NOT use **inputs here to avoid duplicate keys
1495
  speech_values = model_bark.generate(
1496
  input_ids=input_ids,
1497
  attention_mask=attention_mask,
1498
+ max_new_tokens=100, # Only here
1499
+ pad_token_id=model_bark.config.eos_token_id # Optional
1500
  )
1501
  print(f"βœ… Bark finished in {round(time.time() - start, 2)}s")
1502
 
 
1503
  speech = speech_values.cpu().numpy().squeeze()
1504
  speech = (speech * 32767).astype(np.int16)
1505
  temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
 
1509
 
1510
 
1511
 
1512
+
1513
  # Whisper STT
1514
  print("πŸ” Loading Whisper model...")
1515
  whisper_model = whisper.load_model("base", device="cuda")