Update app.py
Browse files
app.py
CHANGED
@@ -143,6 +143,8 @@ judge_llm = AutoModelForCausalLM.from_pretrained(
|
|
143 |
device_map="auto",
|
144 |
token=hf_api_key
|
145 |
)
|
|
|
|
|
146 |
|
147 |
print(judge_llm.hf_device_map)
|
148 |
|
@@ -153,10 +155,11 @@ judge_pipeline = pipeline(
|
|
153 |
max_new_tokens=128,
|
154 |
temperature=0.3,
|
155 |
top_p=0.9,
|
156 |
-
do_sample=
|
157 |
repetition_penalty=1.1,
|
158 |
)
|
159 |
|
|
|
160 |
output = judge_pipeline("Q: What is Python?\nA:", max_new_tokens=128)[0]['generated_text']
|
161 |
print(output)
|
162 |
|
@@ -1481,25 +1484,22 @@ bark_voice_preset = "v2/en_speaker_5"
|
|
1481 |
|
1482 |
def bark_tts(text):
|
1483 |
print(f"π Synthesizing TTS for: {text}")
|
1484 |
-
|
1485 |
-
|
1486 |
-
|
1487 |
-
input_ids = processed["input_ids"].to(model_bark.device)
|
1488 |
-
attention_mask = processed.get("attention_mask", None)
|
1489 |
if attention_mask is not None:
|
1490 |
attention_mask = attention_mask.to(model_bark.device)
|
1491 |
|
1492 |
start = time.time()
|
1493 |
-
#
|
1494 |
speech_values = model_bark.generate(
|
1495 |
input_ids=input_ids,
|
1496 |
attention_mask=attention_mask,
|
1497 |
-
|
1498 |
-
|
1499 |
)
|
1500 |
print(f"β
Bark finished in {round(time.time() - start, 2)}s")
|
1501 |
|
1502 |
-
# Post-processing
|
1503 |
speech = speech_values.cpu().numpy().squeeze()
|
1504 |
speech = (speech * 32767).astype(np.int16)
|
1505 |
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
@@ -1509,6 +1509,7 @@ def bark_tts(text):
|
|
1509 |
|
1510 |
|
1511 |
|
|
|
1512 |
# Whisper STT
|
1513 |
print("π Loading Whisper model...")
|
1514 |
whisper_model = whisper.load_model("base", device="cuda")
|
|
|
143 |
device_map="auto",
|
144 |
token=hf_api_key
|
145 |
)
|
146 |
+
judge_llm.config.pad_token_id = judge_llm.config.eos_token_id
|
147 |
+
|
148 |
|
149 |
print(judge_llm.hf_device_map)
|
150 |
|
|
|
155 |
max_new_tokens=128,
|
156 |
temperature=0.3,
|
157 |
top_p=0.9,
|
158 |
+
do_sample=True, # Optional but recommended with temperature/top_p
|
159 |
repetition_penalty=1.1,
|
160 |
)
|
161 |
|
162 |
+
|
163 |
output = judge_pipeline("Q: What is Python?\nA:", max_new_tokens=128)[0]['generated_text']
|
164 |
print(output)
|
165 |
|
|
|
1484 |
|
1485 |
def bark_tts(text):
|
1486 |
print(f"π Synthesizing TTS for: {text}")
|
1487 |
+
inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
|
1488 |
+
input_ids = inputs["input_ids"].to(model_bark.device)
|
1489 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
|
1490 |
if attention_mask is not None:
|
1491 |
attention_mask = attention_mask.to(model_bark.device)
|
1492 |
|
1493 |
start = time.time()
|
1494 |
+
# β
DO NOT use **inputs here to avoid duplicate keys
|
1495 |
speech_values = model_bark.generate(
|
1496 |
input_ids=input_ids,
|
1497 |
attention_mask=attention_mask,
|
1498 |
+
max_new_tokens=100, # Only here
|
1499 |
+
pad_token_id=model_bark.config.eos_token_id # Optional
|
1500 |
)
|
1501 |
print(f"β
Bark finished in {round(time.time() - start, 2)}s")
|
1502 |
|
|
|
1503 |
speech = speech_values.cpu().numpy().squeeze()
|
1504 |
speech = (speech * 32767).astype(np.int16)
|
1505 |
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
|
|
1509 |
|
1510 |
|
1511 |
|
1512 |
+
|
1513 |
# Whisper STT
|
1514 |
print("π Loading Whisper model...")
|
1515 |
whisper_model = whisper.load_model("base", device="cuda")
|