Spaces:

HemanM
/

EvoConvo

Sleeping

App Files Files Community

HemanM commited on Aug 4

Commit

ca6258b

verified ·

1 Parent(s): 5a8590e

Update generate.py

Browse files

Files changed (1) hide show

generate.py +12 -31

generate.py CHANGED Viewed

@@ -1,14 +1,12 @@
-# generate.py — Generates responses from EvoDecoder with optional web-based RAG
 import torch
 from transformers import AutoTokenizer
 from evo_model import EvoDecoderModel
-from search_utils import web_search  # Make sure this file exists with a working `web_search()` function
-# Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Load tokenizer and EvoDecoder model
 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
 vocab_size = tokenizer.vocab_size
@@ -16,47 +14,30 @@ model = EvoDecoderModel(vocab_size=vocab_size).to(device)
 model.load_state_dict(torch.load("evo_decoder_model.pt", map_location=device))
 model.eval()
-def generate_response(prompt, max_length=100, top_k=40, use_web=False):
-    """
-    Generates a response using EvoDecoder with optional web-enhanced context (RAG).
-    Args:
-        prompt (str): User input prompt.
-        max_length (int): Maximum number of tokens to generate.
-        top_k (int): Top-k sampling for diversity.
-        use_web (bool): Whether to augment prompt using live search.
-    Returns:
-        str: The generated assistant response.
-    """
-    # Add RAG-based context if enabled
     if use_web:
         web_context = web_search(prompt)
-        input_text = f"User: {prompt}\n\nContext: {web_context}\n\nAssistant:"
-    else:
-        input_text = f"User: {prompt}\nAssistant:"
-    # Tokenize input prompt
     input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
-    # Generate tokens autoregressively
     for _ in range(max_length):
         with torch.no_grad():
             logits = model(input_ids)
             next_token_logits = logits[:, -1, :]
             top_k_probs, top_k_indices = torch.topk(next_token_logits, top_k)
             probs = torch.softmax(top_k_probs, dim=-1)
-            next_token = top_k_indices[0, torch.multinomial(probs, 1)]
-        next_token = next_token.unsqueeze(0).unsqueeze(1)  # (1, 1)
         input_ids = torch.cat([input_ids, next_token], dim=1)
         if next_token.item() == tokenizer.eos_token_id:
             break
-    # Decode and return assistant's response only
     output = tokenizer.decode(input_ids[0], skip_special_tokens=True)
-    if "Assistant:" in output:
-        return output.split("Assistant:")[-1].strip()
-    return output.strip()

+# generate.py — EvoDecoder response generation with optional DuckDuckGo RAG
 import torch
 from transformers import AutoTokenizer
 from evo_model import EvoDecoderModel
+from search_utils import web_search
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
 vocab_size = tokenizer.vocab_size
 model.load_state_dict(torch.load("evo_decoder_model.pt", map_location=device))
 model.eval()
+def generate_response(prompt, use_web=False, max_length=100, top_k=40):
+    # Augment with web context if enabled
+    context = ""
     if use_web:
         web_context = web_search(prompt)
+        context += f"Relevant Info: {web_context}\n"
+    input_text = context + f"User: {prompt}\nAssistant:"
     input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
     for _ in range(max_length):
         with torch.no_grad():
             logits = model(input_ids)
             next_token_logits = logits[:, -1, :]
+            # Top-k sampling
             top_k_probs, top_k_indices = torch.topk(next_token_logits, top_k)
             probs = torch.softmax(top_k_probs, dim=-1)
+            next_token = top_k_indices[0, torch.multinomial(probs, 1).item()].unsqueeze(0).unsqueeze(0)
         input_ids = torch.cat([input_ids, next_token], dim=1)
         if next_token.item() == tokenizer.eos_token_id:
             break
     output = tokenizer.decode(input_ids[0], skip_special_tokens=True)
+    return output.split("Assistant:")[-1].strip()