Spaces:
Sleeping
Sleeping
# evo_plugin_example.py — small, instruction-following stand-in generator | |
# The app will use YOUR evo_plugin.py if present; otherwise it falls back to this. | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
class _HFSeq2SeqGenerator: | |
def __init__(self, model_name: str = "google/flan-t5-small"): | |
# CPU is fine for demos; no GPU required on HF Spaces basic CPU. | |
self.device = torch.device("cpu") | |
self.tok = AutoTokenizer.from_pretrained(model_name) | |
self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(self.device).eval() | |
def generate(self, prompt: str, max_new_tokens: int = 200, temperature: float = 0.4) -> str: | |
inputs = self.tok(prompt, return_tensors="pt").to(self.device) | |
out = self.model.generate( | |
**inputs, | |
max_new_tokens=int(max_new_tokens), | |
do_sample=temperature > 0.0, | |
temperature=float(max(0.01, temperature)), | |
top_p=0.9, | |
num_beams=4, # beam search makes it less echo-y | |
early_stopping=True, | |
no_repeat_ngram_size=3, # avoid repeating phrases | |
) | |
return self.tok.decode(out[0], skip_special_tokens=True).strip() | |
def load_model(): | |
# The app calls this to obtain a generator instance. | |
return _HFSeq2SeqGenerator() | |