Tomtom84 commited on
Commit
14f1558
·
verified ·
1 Parent(s): 7bb84b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -104,19 +104,19 @@ async def tts(ws: WebSocket):
104
  offset_len = ids.size(1) # wie viele Tokens existieren schon
105
  last_tok = None
106
  buf = []
107
- # masker.buffer_pos = 0 # Removed initialization here
108
  # Removed buffer_pos update before generation
109
 
110
  while True:
111
- # --- Mini‑Generate (Cache Disabled for Debugging) -------------------------------------------
112
  gen = model.generate(
113
- input_ids = ids, # Always use full sequence
114
- attention_mask = attn, # Always use full attention mask
115
- # past_key_values= past, # Disabled cache
116
  max_new_tokens = CHUNK_TOKENS,
117
  logits_processor=[masker],
118
  do_sample=True, temperature=0.7, top_p=0.95,
119
- use_cache=False, # Disabled cache
120
  return_dict_in_generate=True,
121
  return_legacy_cache=True
122
  )
@@ -128,10 +128,10 @@ async def tts(ws: WebSocket):
128
  break
129
  offset_len += len(new)
130
 
131
- # ----- Update ids and attn with the full sequence (Cache Disabled) ---------
132
- ids = torch.tensor([seq], device=device) # Re-added
133
- attn = torch.ones_like(ids) # Re-added
134
- # past = gen.past_key_values # Disabled cache access
135
  last_tok = new[-1]
136
 
137
  print("new tokens:", new[:25], flush=True)
@@ -146,12 +146,12 @@ async def tts(ws: WebSocket):
146
  # Only append if it's an audio token
147
  # Only append if it's an audio token
148
  buf.append(t - AUDIO_BASE) # Reverted to appending relative token
149
- # masker.buffer_pos += 1 # Removed increment here
150
  if len(buf) == 7:
151
  await ws.send_bytes(decode_block(buf))
152
  buf.clear()
153
  masker.sent_blocks = 1 # ab jetzt EOS zulässig
154
- # masker.buffer_pos = 0 # Removed reset here
155
  # Removed else block for skipping non-audio tokens
156
 
157
  except (StopIteration, WebSocketDisconnect):
 
104
  offset_len = ids.size(1) # wie viele Tokens existieren schon
105
  last_tok = None
106
  buf = []
107
+ masker.buffer_pos = 0 # Removed initialization here
108
  # Removed buffer_pos update before generation
109
 
110
  while True:
111
+ # --- Mini‑Generate (Cache Re-enabled) -------------------------------------------
112
  gen = model.generate(
113
+ input_ids = ids if past is None else torch.tensor([[last_tok]], device=device), # Re-enabled cache input
114
+ attention_mask = attn if past is None else None, # Re-enabled cache attention
115
+ past_key_values = past, # Re-enabled cache
116
  max_new_tokens = CHUNK_TOKENS,
117
  logits_processor=[masker],
118
  do_sample=True, temperature=0.7, top_p=0.95,
119
+ use_cache=True, # Re-enabled cache
120
  return_dict_in_generate=True,
121
  return_legacy_cache=True
122
  )
 
128
  break
129
  offset_len += len(new)
130
 
131
+ # ----- Update past and last_tok (Cache Re-enabled) ---------
132
+ # ids = torch.tensor([seq], device=device) # Removed full sequence update
133
+ # attn = torch.ones_like(ids) # Removed full sequence update
134
+ past = gen.past_key_values # Re-enabled cache update
135
  last_tok = new[-1]
136
 
137
  print("new tokens:", new[:25], flush=True)
 
146
  # Only append if it's an audio token
147
  # Only append if it's an audio token
148
  buf.append(t - AUDIO_BASE) # Reverted to appending relative token
149
+ masker.buffer_pos += 1 # Removed increment here
150
  if len(buf) == 7:
151
  await ws.send_bytes(decode_block(buf))
152
  buf.clear()
153
  masker.sent_blocks = 1 # ab jetzt EOS zulässig
154
+ masker.buffer_pos = 0 # Removed reset here
155
  # Removed else block for skipping non-audio tokens
156
 
157
  except (StopIteration, WebSocketDisconnect):