Update app.py
Browse files
app.py
CHANGED
@@ -1481,9 +1481,9 @@ bark_voice_preset = "v2/en_speaker_5"
|
|
1481 |
|
1482 |
def bark_tts(text):
|
1483 |
print(f"π Synthesizing TTS for: {text}")
|
1484 |
-
|
1485 |
-
input_ids =
|
1486 |
-
attention_mask =
|
1487 |
if attention_mask is not None:
|
1488 |
attention_mask = attention_mask.to(model_bark.device)
|
1489 |
|
@@ -1491,8 +1491,8 @@ def bark_tts(text):
|
|
1491 |
speech_values = model_bark.generate(
|
1492 |
input_ids=input_ids,
|
1493 |
attention_mask=attention_mask,
|
1494 |
-
|
1495 |
-
|
1496 |
)
|
1497 |
print(f"β
Bark finished in {round(time.time() - start, 2)}s")
|
1498 |
|
@@ -1503,6 +1503,7 @@ def bark_tts(text):
|
|
1503 |
return temp_wav.name
|
1504 |
|
1505 |
|
|
|
1506 |
# Whisper STT
|
1507 |
print("π Loading Whisper model...")
|
1508 |
whisper_model = whisper.load_model("base", device="cuda")
|
|
|
1481 |
|
1482 |
def bark_tts(text):
|
1483 |
print(f"π Synthesizing TTS for: {text}")
|
1484 |
+
processed = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
|
1485 |
+
input_ids = processed["input_ids"].to(model_bark.device)
|
1486 |
+
attention_mask = processed.get("attention_mask", None)
|
1487 |
if attention_mask is not None:
|
1488 |
attention_mask = attention_mask.to(model_bark.device)
|
1489 |
|
|
|
1491 |
speech_values = model_bark.generate(
|
1492 |
input_ids=input_ids,
|
1493 |
attention_mask=attention_mask,
|
1494 |
+
pad_token_id=10000, # Optional safety
|
1495 |
+
max_new_tokens=100 # β
Passed once only here
|
1496 |
)
|
1497 |
print(f"β
Bark finished in {round(time.time() - start, 2)}s")
|
1498 |
|
|
|
1503 |
return temp_wav.name
|
1504 |
|
1505 |
|
1506 |
+
|
1507 |
# Whisper STT
|
1508 |
print("π Loading Whisper model...")
|
1509 |
whisper_model = whisper.load_model("base", device="cuda")
|