Spaces:

HemanM
/

EvoConvo

Sleeping

HemanM commited on Aug 4

Commit

6648cc8

verified ·

1 Parent(s): 5829fd2

Update generate.py

Files changed (1) hide show

generate.py CHANGED Viewed

@@ -1,8 +1,7 @@
-# generate.py — Evo + RAG (Web Search Integration)
 import torch
 from transformers import AutoTokenizer
 from evo_model import EvoDecoderModel
-from web_search import web_search  # 🔍 Import RAG utility
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -15,11 +14,7 @@ model.load_state_dict(torch.load("evo_decoder_model.pt", map_location=device))
 model.eval()
 def generate_response(prompt, max_length=100, top_k=40):
-    # 🔍 Step 1: Get web search context
-    context = web_search(prompt)
-    # Step 2: Prepend context to the prompt
-    input_text = f"[CONTEXT]\n{context}\n\nUser: {prompt}\nAssistant:"
     input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
     for _ in range(max_length):
@@ -29,12 +24,13 @@ def generate_response(prompt, max_length=100, top_k=40):
             # Top-k sampling
             top_k_probs, top_k_indices = torch.topk(next_token_logits, top_k)
-            probs = torch.softmax(top_k_probs, dim=-1)
-            next_token = top_k_indices[torch.multinomial(probs, 1)]
         input_ids = torch.cat([input_ids, next_token.unsqueeze(0)], dim=1)
-        # Stop if EOS token is predicted
         if next_token.item() == tokenizer.eos_token_id:
             break

+# generate.py — Generates responses from EvoDecoderModel with Top-k sampling
 import torch
 from transformers import AutoTokenizer
 from evo_model import EvoDecoderModel
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.eval()
 def generate_response(prompt, max_length=100, top_k=40):
+    input_text = f"User: {prompt}\nAssistant:"
     input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
     for _ in range(max_length):
             # Top-k sampling
             top_k_probs, top_k_indices = torch.topk(next_token_logits, top_k)
+            probs = torch.softmax(top_k_probs.squeeze(0), dim=-1)  # Flatten
+            sampled_index = torch.multinomial(probs, 1).item()
+            next_token = top_k_indices[0, sampled_index]
         input_ids = torch.cat([input_ids, next_token.unsqueeze(0)], dim=1)
+        # Stop if EOS token
         if next_token.item() == tokenizer.eos_token_id:
             break