Spaces:

HemanM
/

evo-gov-copilot-mu

Sleeping

HemanM commited on 21 days ago

Commit

1861d4a

verified ·

1 Parent(s): af358ab

Update evo_plugin_example.py

Files changed (1) hide show

evo_plugin_example.py CHANGED Viewed

@@ -1,31 +1,34 @@
-# evo_plugin_example.py — small, instruction-following stand-in generator
-# The app will use YOUR evo_plugin.py if present; otherwise it falls back to this.
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 class _HFSeq2SeqGenerator:
     def __init__(self, model_name: str = "google/flan-t5-small"):
-        # CPU is fine for demos; no GPU required on HF Spaces basic CPU.
         self.device = torch.device("cpu")
         self.tok = AutoTokenizer.from_pretrained(model_name)
         self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(self.device).eval()
     @torch.no_grad()
-    def generate(self, prompt: str, max_new_tokens: int = 200, temperature: float = 0.4) -> str:
         inputs = self.tok(prompt, return_tensors="pt").to(self.device)
         out = self.model.generate(
             **inputs,
-            max_new_tokens=int(max_new_tokens),
             do_sample=temperature > 0.0,
             temperature=float(max(0.01, temperature)),
             top_p=0.9,
-            num_beams=4,                 # beam search makes it less echo-y
             early_stopping=True,
-            no_repeat_ngram_size=3,      # avoid repeating phrases
         )
-        return self.tok.decode(out[0], skip_special_tokens=True).strip()
 def load_model():
-    # The app calls this to obtain a generator instance.
     return _HFSeq2SeqGenerator()

+# evo_plugin_example.py — FLAN-T5 stand-in (anti-echo tuned)
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 class _HFSeq2SeqGenerator:
     def __init__(self, model_name: str = "google/flan-t5-small"):
         self.device = torch.device("cpu")
         self.tok = AutoTokenizer.from_pretrained(model_name)
         self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(self.device).eval()
     @torch.no_grad()
+    def generate(self, prompt: str, max_new_tokens: int = 200, temperature: float = 0.0) -> str:
         inputs = self.tok(prompt, return_tensors="pt").to(self.device)
+        # Encourage non-trivial length and reduce repeats
+        min_new = max(48, int(0.4 * max_new_tokens))
         out = self.model.generate(
             **inputs,
+            max_length=inputs["input_ids"].shape[1] + int(max_new_tokens),
+            min_length=inputs["input_ids"].shape[1] + int(min_new),
             do_sample=temperature > 0.0,
             temperature=float(max(0.01, temperature)),
             top_p=0.9,
+            num_beams=4,
             early_stopping=True,
+            no_repeat_ngram_size=3,
+            repetition_penalty=1.1,
+            length_penalty=0.1,
         )
+        text = self.tok.decode(out[0], skip_special_tokens=True).strip()
+        return text
 def load_model():
     return _HFSeq2SeqGenerator()