Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ torch.backends.cuda.enable_flash_sdp(False) # PyTorch‑2.2‑Bug
|
|
15 |
|
16 |
# 1) Konstanten -------------------------------------------------------
|
17 |
REPO = "SebastianBodza/Kartoffel_Orpheus-3B_german_natural-v0.1"
|
18 |
-
CHUNK_TOKENS =
|
19 |
START_TOKEN = 128259
|
20 |
NEW_BLOCK = 128257
|
21 |
EOS_TOKEN = 128258
|
@@ -108,16 +108,16 @@ async def tts(ws: WebSocket):
|
|
108 |
|
109 |
while True:
|
110 |
next_cache_pos = torch.tensor([offset_len], device=device) if past is not None else None
|
111 |
-
|
112 |
gen = model.generate(
|
113 |
input_ids = ids if past is None else torch.tensor([[last_tok]], device=device),
|
114 |
attention_mask = attn if past is None else None,
|
115 |
past_key_values = past,
|
116 |
-
cache_position =
|
117 |
max_new_tokens = CHUNK_TOKENS,
|
118 |
logits_processor=[masker],
|
119 |
do_sample=True, temperature=0.7, top_p=0.95,
|
120 |
use_cache=True, return_dict_in_generate=True,
|
|
|
121 |
)
|
122 |
|
123 |
# neu erzeugte Tokens hinter dem bisherigen Ende
|
|
|
15 |
|
16 |
# 1) Konstanten -------------------------------------------------------
|
17 |
REPO = "SebastianBodza/Kartoffel_Orpheus-3B_german_natural-v0.1"
|
18 |
+
CHUNK_TOKENS = 7
|
19 |
START_TOKEN = 128259
|
20 |
NEW_BLOCK = 128257
|
21 |
EOS_TOKEN = 128258
|
|
|
108 |
|
109 |
while True:
|
110 |
next_cache_pos = torch.tensor([offset_len], device=device) if past is not None else None
|
|
|
111 |
gen = model.generate(
|
112 |
input_ids = ids if past is None else torch.tensor([[last_tok]], device=device),
|
113 |
attention_mask = attn if past is None else None,
|
114 |
past_key_values = past,
|
115 |
+
cache_position = None if past is None else next_cache_pos,
|
116 |
max_new_tokens = CHUNK_TOKENS,
|
117 |
logits_processor=[masker],
|
118 |
do_sample=True, temperature=0.7, top_p=0.95,
|
119 |
use_cache=True, return_dict_in_generate=True,
|
120 |
+
return_legacy_cache=False
|
121 |
)
|
122 |
|
123 |
# neu erzeugte Tokens hinter dem bisherigen Ende
|