Spaces:
Sleeping
Sleeping
File size: 3,035 Bytes
f7f9a8a 785c4f7 f7f9a8a 11f2d5b f7f9a8a 5b3d26d 7a7ebad f7f9a8a 7a7ebad f7f9a8a 70b5bb7 f7f9a8a 70b5bb7 f7f9a8a 7a7ebad f7f9a8a 5b3d26d f7f9a8a 5b3d26d 7a7ebad f7f9a8a 5b3d26d f7f9a8a 5b3d26d f7f9a8a 5b3d26d f7f9a8a 7a7ebad 5b3d26d f7f9a8a 5b3d26d e87c15e 5b3d26d f7f9a8a e87c15e 5b3d26d e87c15e 5b3d26d e87c15e 591a205 f7f9a8a 591a205 8ed51aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import os
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer
from evo_model import EvoTransformerV22
from search_utils import web_search
import openai
import time
# ๐ Load OpenAI API Key securely
openai.api_key = os.getenv("OPENAI_API_KEY")
# ๐ Track model changes
MODEL_PATH = "evo_hellaswag.pt"
last_mod_time = 0
model = None
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
# ๐ฆ Load model with auto-reload if file is updated
def load_model():
global model, last_mod_time
current_mod_time = os.path.getmtime(MODEL_PATH)
if model is None or current_mod_time > last_mod_time:
model = EvoTransformerV22()
model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu"))
model.eval()
last_mod_time = current_mod_time
print("๐ Evo model reloaded.")
return model
# ๐ง Evo decision logic with confidence scores
def get_evo_response(query, options, user_context=""):
model = load_model()
# Retrieve RAG context + optional user input
context_texts = web_search(query) + ([user_context] if user_context else [])
context_str = "\n".join(context_texts)
input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options]
# Encode both options and compute scores
scores = []
for pair in input_pairs:
encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
with torch.no_grad():
logits = model(encoded["input_ids"])
score = torch.sigmoid(logits).item()
scores.append(score)
best_idx = int(scores[1] > scores[0])
return (
options[best_idx], # ๐น Selected answer
max(scores), # ๐น Confidence score
f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}", # ๐น Reasoning trace
context_str # ๐น Context used
)
# ๐ค GPT-3.5 backup or comparison
def get_gpt_response(query, user_context=""):
try:
context_block = f"\n\nContext:\n{user_context}" if user_context else ""
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": query + context_block}],
temperature=0.7,
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"โ ๏ธ GPT error:\n\n{str(e)}"
# โ
Final callable interface
def infer(query, options, user_context=""):
return get_evo_response(query, options, user_context)
# ๐ง Unified chat-style interface for EvoRAG
def evo_chat_predict(history, query, options):
# Use the last few exchanges as context (up to 3 pairs)
context = "\n".join(history[-6:]) if history else ""
evo_ans, evo_score, evo_reason, evo_ctx = get_evo_response(query, options, context)
return {
"answer": evo_ans,
"confidence": round(evo_score, 3),
"reasoning": evo_reason,
"context_used": evo_ctx
}
|