Spaces:

HemanM
/

evo-gov-copilot-mu

Sleeping

App Files Files Community

HemanM commited on 21 days ago

Commit

1794bf5

verified ·

1 Parent(s): 1861d4a

Update evo_plugin_example.py

Browse files

Files changed (1) hide show

evo_plugin_example.py +30 -14

evo_plugin_example.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# evo_plugin_example.py — FLAN-T5 stand-in (anti-echo tuned)
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
@@ -7,28 +7,44 @@ class _HFSeq2SeqGenerator:
         self.device = torch.device("cpu")
         self.tok = AutoTokenizer.from_pretrained(model_name)
         self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(self.device).eval()
     @torch.no_grad()
     def generate(self, prompt: str, max_new_tokens: int = 200, temperature: float = 0.0) -> str:
-        inputs = self.tok(prompt, return_tensors="pt").to(self.device)
-        # Encourage non-trivial length and reduce repeats
-        min_new = max(48, int(0.4 * max_new_tokens))
-        out = self.model.generate(
-            **inputs,
-            max_length=inputs["input_ids"].shape[1] + int(max_new_tokens),
-            min_length=inputs["input_ids"].shape[1] + int(min_new),
-            do_sample=temperature > 0.0,
-            temperature=float(max(0.01, temperature)),
-            top_p=0.9,
-            num_beams=4,
             early_stopping=True,
             no_repeat_ngram_size=3,
             repetition_penalty=1.1,
             length_penalty=0.1,
         )
-        text = self.tok.decode(out[0], skip_special_tokens=True).strip()
-        return text
 def load_model():
     return _HFSeq2SeqGenerator()

+# evo_plugin_example.py — FLAN-T5 stand-in (truncation + clean kwargs)
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
         self.device = torch.device("cpu")
         self.tok = AutoTokenizer.from_pretrained(model_name)
         self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(self.device).eval()
+        # FLAN-T5 encoder max length
+        ml = getattr(self.tok, "model_max_length", 512) or 512
+        # Some tokenizers report a huge sentinel value; clamp to 512 for T5-small
+        self.max_src_len = min(512, int(ml if ml < 10000 else 512))
     @torch.no_grad()
     def generate(self, prompt: str, max_new_tokens: int = 200, temperature: float = 0.0) -> str:
+        # TRUNCATE input to model's max encoder length
+        inputs = self.tok(
+            prompt,
+            return_tensors="pt",
+            truncation=True,
+            max_length=self.max_src_len,
+        ).to(self.device)
+        do_sample = float(temperature) > 0.0
+        gen_kwargs = dict(
+            max_new_tokens=int(max_new_tokens),
+            num_beams=4,                 # stable, less echo
             early_stopping=True,
             no_repeat_ngram_size=3,
             repetition_penalty=1.1,
             length_penalty=0.1,
         )
+        # Only include sampling args when sampling is ON (silences warnings)
+        if do_sample:
+            gen_kwargs.update(
+                do_sample=True,
+                temperature=float(max(0.01, temperature)),
+                top_p=0.9,
+            )
+        # Encourage non-trivial length without tying to input length
+        gen_kwargs["min_new_tokens"] = max(48, int(0.4 * max_new_tokens))
+        out = self.model.generate(**inputs, **gen_kwargs)
+        return self.tok.decode(out[0], skip_special_tokens=True).strip()
 def load_model():
     return _HFSeq2SeqGenerator()