EvoPlatform / inference.py
HemanM's picture
Update inference.py
2b652a8 verified
raw
history blame
1.02 kB
import torch
from evo_model import EvoTransformer
# Load EvoTransformer model
def load_model(model_path="evo_hellaswag.pt", device=None):
if device is None:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = EvoTransformer()
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()
return model, device
# Predict the best option (0 or 1)
def predict(model, tokenizer, prompt, option1, option2, device):
inputs = [
f"{prompt} {option1}",
f"{prompt} {option2}",
]
encoded = tokenizer(inputs, padding=True, truncation=True, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(encoded["input_ids"]) # already includes classifier
logits = outputs.squeeze(-1) # shape: [2]
probs = torch.softmax(logits, dim=0)
best = torch.argmax(probs).item()
return {
"choice": best,
"confidence": probs[best].item(),
"scores": probs.tolist(),
}