Spaces:
Sleeping
Sleeping
import os | |
import torch | |
import torch.nn.functional as F | |
from transformers import AutoTokenizer | |
from evo_model import EvoTransformerV22 | |
from search_utils import web_search | |
import openai | |
import time | |
# π Load OpenAI API Key securely | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
# π Track model changes | |
MODEL_PATH = "evo_hellaswag.pt" | |
last_mod_time = 0 | |
model = None | |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") | |
# π¦ Load model with auto-reload if file is updated | |
def load_model(): | |
global model, last_mod_time | |
current_mod_time = os.path.getmtime(MODEL_PATH) | |
if model is None or current_mod_time > last_mod_time: | |
model = EvoTransformerV22() | |
model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu")) | |
model.eval() | |
last_mod_time = current_mod_time | |
print("π Evo model reloaded.") | |
return model | |
# π§ Evo decision logic with confidence scores | |
def get_evo_response(query, options, user_context=""): | |
model = load_model() | |
# Retrieve RAG context + optional user input | |
context_texts = web_search(query) + ([user_context] if user_context else []) | |
context_str = "\n".join(context_texts) | |
input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options] | |
# Encode both options and compute scores | |
scores = [] | |
for pair in input_pairs: | |
encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128) | |
with torch.no_grad(): | |
logits = model(encoded["input_ids"]) | |
score = torch.sigmoid(logits).item() | |
scores.append(score) | |
best_idx = int(scores[1] > scores[0]) | |
return ( | |
options[best_idx], # πΉ Selected answer | |
max(scores), # πΉ Confidence score | |
f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}", # πΉ Reasoning trace | |
context_str # πΉ Context used | |
) | |
# π€ GPT-3.5 backup or comparison | |
def get_gpt_response(query, user_context=""): | |
try: | |
context_block = f"\n\nContext:\n{user_context}" if user_context else "" | |
response = openai.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[{"role": "user", "content": query + context_block}], | |
temperature=0.7, | |
) | |
return response.choices[0].message.content.strip() | |
except Exception as e: | |
return f"β οΈ GPT error:\n\n{str(e)}" | |
# β Final callable interface | |
def infer(query, options, user_context=""): | |
return get_evo_response(query, options, user_context) | |
# π§ Unified chat-style interface for EvoRAG | |
def evo_chat_predict(history, query, options): | |
# Use the last few exchanges as context (up to 3 pairs) | |
context = "\n".join(history[-6:]) if history else "" | |
evo_ans, evo_score, evo_reason, evo_ctx = get_evo_response(query, options, context) | |
return { | |
"answer": evo_ans, | |
"confidence": round(evo_score, 3), | |
"reasoning": evo_reason, | |
"context_used": evo_ctx | |
} | |