File size: 1,381 Bytes
6a117f9
 
e94ab3b
b7222e2
e94ab3b
b7222e2
 
6a117f9
e94ab3b
 
b7222e2
e94ab3b
 
 
 
 
 
 
 
 
ffe2489
 
 
 
e94ab3b
b7222e2
e94ab3b
ffe2489
e94ab3b
6a117f9
b7222e2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# evo_plugin_example.py — small, instruction-following stand-in generator
# The app will use YOUR evo_plugin.py if present; otherwise it falls back to this.
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

class _HFSeq2SeqGenerator:
    def __init__(self, model_name: str = "google/flan-t5-small"):
        # CPU is fine for demos; no GPU required on HF Spaces basic CPU.
        self.device = torch.device("cpu")
        self.tok = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(self.device).eval()

    @torch.no_grad()
    def generate(self, prompt: str, max_new_tokens: int = 200, temperature: float = 0.4) -> str:
        inputs = self.tok(prompt, return_tensors="pt").to(self.device)
        out = self.model.generate(
            **inputs,
            max_new_tokens=int(max_new_tokens),
            do_sample=temperature > 0.0,
            temperature=float(max(0.01, temperature)),
            top_p=0.9,
            num_beams=4,                 # beam search makes it less echo-y
            early_stopping=True,
            no_repeat_ngram_size=3,      # avoid repeating phrases
        )
        return self.tok.decode(out[0], skip_special_tokens=True).strip()


def load_model():
    # The app calls this to obtain a generator instance.
    return _HFSeq2SeqGenerator()