dev-mode-orpheus-tts

Paused

App Files Files Community

Tomtom84 commited on Apr 21

Commit

b87ae72

verified ·

1 Parent(s): 2a24991

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -59

app.py CHANGED Viewed

@@ -107,73 +107,87 @@ async def tts(ws: WebSocket):
         last_tok   = None
         buf         = []
         while True:
-            print(f"DEBUG: Before generate - past is None: {past is None}", flush=True) # Added logging
-            print(f"DEBUG: Before generate - type of past: {type(past) if past is not None else 'None'}", flush=True) # Added logging)
-            if past is None:
-                # First generation step
-                gen = model.generate(
-                    input_ids       = ids,
-                    attention_mask  = attn,
-                    past_key_values = past, # This will be None
-                    max_new_tokens = 1,
-                    logits_processor=[masker],
-                    do_sample=True, temperature=0.7, top_p=0.95,
                     use_cache=True,
-                    return_dict_in_generate=True,
-                )
-            else:
-                # Subsequent generation steps
-                current_input_ids = torch.tensor([[last_tok]], device=device)
-                current_attention_mask = torch.ones_like(current_input_ids)
-                gen = model.generate(
-                    input_ids       = current_input_ids,
-                    attention_mask  = current_attention_mask,
-                    past_key_values = past, # This will be a Cache object
-                    max_new_tokens = 1,
-                    logits_processor=[masker],
-                    do_sample=True, temperature=0.7, top_p=0.95,
-                    use_cache=True,
-                    return_dict_in_generate=True,
-                    cache_position=torch.tensor([offset_len], device=device) # Explicitly pass cache_position
-                )
-            print(f"DEBUG: After generate - type of gen.past_key_values: {type(gen.past_key_values)}", flush=True) # Added logging)
-            # Convert legacy tuple cache to DynamicCache if necessary (only after the first step)
-            if past is None and isinstance(gen.past_key_values, tuple):
-                past = DynamicCache.from_legacy_cache(gen.past_key_values)
-            else:
-                # For subsequent steps, just update past with the new cache object
-                past = gen.past_key_values
-            print(f"DEBUG: After cache update - type of past: {type(past)}", flush=True) # Added logging)
-            # ----- neue Tokens heraus schneiden --------------------------
-            seq  = gen.sequences[0].tolist()
-            new  = seq[offset_len:]
-            if not new:                         # nichts -> fertig
-                break
-            offset_len += len(new)
-            # ----- Update last_tok ---------
-            last_tok = new[-1]
-            print("new tokens:", new[:25], flush=True)
             # ----- Token‑Handling ----------------------------------------
-            for t in new:
-                if t == EOS_TOKEN: # Re-enabled EOS check
-                    raise StopIteration # Re-enabled EOS check
-                if t == NEW_BLOCK:
-                    buf.clear()
-                    continue
-                buf.append(t - AUDIO_BASE) # Reverted to appending relative token
-                if len(buf) == 7:
-                    await ws.send_bytes(decode_block(buf))
-                    buf.clear()
-                    masker.sent_blocks = 1      # ab jetzt EOS zulässig
     except (StopIteration, WebSocketDisconnect):
         pass

         last_tok   = None
         buf         = []
+        # Initial generation step using model.generate
+        with torch.no_grad():
+            gen = model.generate(
+                input_ids       = ids,
+                attention_mask  = attn,
+                past_key_values = None, # Initial call, no past cache
+                max_new_tokens = 1,
+                logits_processor=[masker],
+                do_sample=True, temperature=0.7, top_p=0.95,
+                use_cache=True,
+                return_dict_in_generate=True,
+            )
+        # Get the initial cache and last token
+        past = gen.past_key_values
+        if isinstance(past, tuple):
+             past = DynamicCache.from_legacy_cache(past) # Convert legacy tuple cache
+        last_tok = gen.sequences[0].tolist()[-1]
+        offset_len += 1 # Increment offset for the first generated token
+        print(f"DEBUG: After initial generate - type of past: {type(past)}", flush=True) # Added logging
+        print("new tokens:", [last_tok], flush=True) # Log the first token
+        # Handle the first generated token
+        if last_tok == EOS_TOKEN:
+            raise StopIteration
+        if last_tok == NEW_BLOCK:
+            buf.clear()
+        else:
+            buf.append(last_tok - AUDIO_BASE)
+            if len(buf) == 7:
+                await ws.send_bytes(decode_block(buf))
+                buf.clear()
+                masker.sent_blocks = 1
+        # Manual generation loop for subsequent tokens
         while True:
+            print(f"DEBUG: Before forward - type of past: {type(past)}", flush=True) # Added logging
+            # Prepare inputs for the next token
+            current_input_ids = torch.tensor([[last_tok]], device=device)
+            current_attention_mask = torch.ones_like(current_input_ids)
+            current_cache_position = torch.tensor([offset_len], device=device)
+            # Perform forward pass
+            with torch.no_grad():
+                 outputs = model(
+                    input_ids=current_input_ids,
+                    attention_mask=current_attention_mask,
+                    past_key_values=past,
+                    cache_position=current_cache_position,
                     use_cache=True,
+                 )
+            # Sample the next token (greedy sampling)
+            next_token_logits = outputs.logits[:, -1, :]
+            # Apply logits processor manually
+            processed_logits = masker(current_input_ids, next_token_logits.unsqueeze(0))[0]
+            next_token_id = torch.argmax(processed_logits).item()
+            print(f"DEBUG: After forward - type of outputs.past_key_values: {type(outputs.past_key_values)}", flush=True) # Added logging
+            # Update cache and last token
+            past = outputs.past_key_values
+            last_tok = next_token_id
+            offset_len += 1 # Increment offset for the new token
+            print("new tokens:", [last_tok], flush=True) # Log the new token
             # ----- Token‑Handling ----------------------------------------
+            if last_tok == EOS_TOKEN:
+                raise StopIteration
+            if last_tok == NEW_BLOCK:
+                buf.clear()
+                continue # Continue loop to generate the next token
+            buf.append(last_tok - AUDIO_BASE)
+            if len(buf) == 7:
+                await ws.send_bytes(decode_block(buf))
+                buf.clear()
+                masker.sent_blocks = 1      # ab jetzt EOS zulässig
     except (StopIteration, WebSocketDisconnect):
         pass