Spaces:
Sleeping
Sleeping
import os | |
import torch | |
import torch.nn.functional as F | |
from transformers import AutoTokenizer | |
from evo_model import EvoTransformerV22 | |
from search_utils import web_search | |
import openai | |
import time | |
import psutil | |
import platform | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
MODEL_PATH = "evo_hellaswag.pt" | |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") | |
model = None | |
last_mod_time = 0 | |
# ๐ Load Evo model with auto-reload | |
def load_model(): | |
global model, last_mod_time | |
try: | |
current_mod_time = os.path.getmtime(MODEL_PATH) | |
if model is None or current_mod_time > last_mod_time: | |
model = EvoTransformerV22() | |
model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu")) | |
model.eval() | |
last_mod_time = current_mod_time | |
print("โ Evo model loaded.") | |
except Exception as e: | |
print(f"โ Error loading Evo model: {e}") | |
model = None | |
return model | |
# ๐ฎ Evo inference core logic | |
def evo_infer(query, options, user_context=""): | |
model = load_model() | |
if model is None: | |
return "Model Error", 0.0, "Model not available", "" | |
def is_fact_or_math(q): | |
q_lower = q.lower() | |
return any(char.isdigit() for char in q_lower) or any(op in q_lower for op in ["+", "-", "*", "/", "=", "what is", "solve", "calculate"]) | |
if is_fact_or_math(query): | |
context_str = user_context or "" | |
else: | |
search_results = web_search(query) | |
context_str = "\n".join(search_results + ([user_context] if user_context else [])) | |
input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options] | |
scores = [] | |
for pair in input_pairs: | |
encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128) | |
with torch.no_grad(): | |
logits = model(encoded["input_ids"]) | |
score = torch.sigmoid(logits).item() | |
scores.append(score) | |
best_idx = int(scores[1] > scores[0]) | |
return ( | |
options[best_idx], | |
max(scores), | |
f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}", | |
context_str | |
) | |
# ๐ค GPT fallback (for comparison) | |
def get_gpt_response(query, user_context=""): | |
try: | |
context_block = f"\n\nContext:\n{user_context}" if user_context else "" | |
response = openai.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[{"role": "user", "content": query + context_block}], | |
temperature=0.7, | |
) | |
return response.choices[0].message.content.strip() | |
except Exception as e: | |
return f"โ ๏ธ GPT error:\n{str(e)}" | |
# ๐ง Live Evo prediction logic | |
def evo_chat_predict(history, query, options): | |
try: | |
if isinstance(history, list): | |
context = "\n".join(history[-6:]) | |
elif hasattr(history, "empty") and not history.empty: | |
context = "\n".join(history.tail(6).astype(str).tolist()) | |
else: | |
context = "" | |
except Exception: | |
context = "" | |
evo_ans, evo_score, evo_reason, evo_ctx = evo_infer(query, options, context) | |
return { | |
"answer": evo_ans, | |
"confidence": round(evo_score, 3), | |
"reasoning": evo_reason, | |
"context_used": evo_ctx | |
} | |
# ๐ Evo model config metadata | |
def get_model_config(): | |
return { | |
"num_layers": 6, | |
"num_heads": 8, | |
"ffn_dim": 1024, | |
"memory_enabled": True, | |
"phase": "v2.2", | |
"accuracy": "~64.5%" | |
} | |
# ๐ฅ๏ธ Runtime stats | |
def get_system_stats(): | |
gpu_info = torch.cuda.get_device_properties(0) if torch.cuda.is_available() else None | |
memory = psutil.virtual_memory() | |
return { | |
"device": "GPU" if torch.cuda.is_available() else "CPU", | |
"cpu_usage_percent": psutil.cpu_percent(), | |
"memory_used_gb": round(memory.used / (1024 ** 3), 2), | |
"memory_total_gb": round(memory.total / (1024 ** 3), 2), | |
"gpu_name": gpu_info.name if gpu_info else "N/A", | |
"gpu_memory_total_gb": round(gpu_info.total_memory / (1024 ** 3), 2) if gpu_info else "N/A", | |
"gpu_memory_used_gb": round(torch.cuda.memory_allocated() / (1024 ** 3), 2) if gpu_info else "N/A", | |
"platform": platform.platform() | |
} | |
# ๐ Retrain from in-memory feedback_log | |
def retrain_from_feedback(feedback_log): | |
if not feedback_log: | |
return "โ ๏ธ No feedback data to retrain from." | |
model = load_model() | |
if model is None: | |
return "โ Evo model not available." | |
model.train() | |
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) | |
for row in feedback_log: | |
question, opt1, opt2, answer, *_ = row | |
label = torch.tensor([1.0 if answer.strip() == opt2.strip() else 0.0]) # opt2 = class 1 | |
input_text = f"{question} [SEP] {opt2 if label.item() == 1 else opt1}" | |
encoded = tokenizer(input_text, return_tensors="pt", padding="max_length", truncation=True, max_length=128) | |
logits = model(encoded["input_ids"]) | |
loss = F.binary_cross_entropy_with_logits(logits.squeeze(), label) | |
loss.backward() | |
optimizer.step() | |
optimizer.zero_grad() | |
torch.save(model.state_dict(), MODEL_PATH) | |
return "โ Evo retrained and reloaded from memory." | |