import os import torch import torch.nn.functional as F from transformers import AutoTokenizer from evo_model import EvoTransformerV22 from search_utils import web_search import openai import time import psutil import platform openai.api_key = os.getenv("OPENAI_API_KEY") MODEL_PATH = "evo_hellaswag.pt" tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") model = None last_mod_time = 0 # 🔁 Load Evo model with auto-reload def load_model(): global model, last_mod_time try: current_mod_time = os.path.getmtime(MODEL_PATH) if model is None or current_mod_time > last_mod_time: model = EvoTransformerV22() model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu")) model.eval() last_mod_time = current_mod_time print("✅ Evo model loaded.") except Exception as e: print(f"❌ Error loading Evo model: {e}") model = None return model # 🔮 Evo inference core logic def evo_infer(query, options, user_context=""): model = load_model() if model is None: return "Model Error", 0.0, "Model not available", "" def is_fact_or_math(q): q_lower = q.lower() return any(char.isdigit() for char in q_lower) or any(op in q_lower for op in ["+", "-", "*", "/", "=", "what is", "solve", "calculate"]) if is_fact_or_math(query): context_str = user_context or "" else: search_results = web_search(query) context_str = "\n".join(search_results + ([user_context] if user_context else [])) input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options] scores = [] for pair in input_pairs: encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128) with torch.no_grad(): logits = model(encoded["input_ids"]) score = torch.sigmoid(logits).item() scores.append(score) best_idx = int(scores[1] > scores[0]) return ( options[best_idx], max(scores), f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}", context_str ) # 🤖 GPT fallback (for comparison) def get_gpt_response(query, user_context=""): try: context_block = f"\n\nContext:\n{user_context}" if user_context else "" response = openai.chat.completions.create( model="gpt-3.5-turbo", messages=[{"role": "user", "content": query + context_block}], temperature=0.7, ) return response.choices[0].message.content.strip() except Exception as e: return f"⚠️ GPT error:\n{str(e)}" # 🧠 Live Evo prediction logic def evo_chat_predict(history, query, options): try: if isinstance(history, list): context = "\n".join(history[-6:]) elif hasattr(history, "empty") and not history.empty: context = "\n".join(history.tail(6).astype(str).tolist()) else: context = "" except Exception: context = "" evo_ans, evo_score, evo_reason, evo_ctx = evo_infer(query, options, context) return { "answer": evo_ans, "confidence": round(evo_score, 3), "reasoning": evo_reason, "context_used": evo_ctx } # 📊 Evo model config metadata def get_model_config(): return { "num_layers": 6, "num_heads": 8, "ffn_dim": 1024, "memory_enabled": True, "phase": "v2.2", "accuracy": "~64.5%" } # 🖥️ Runtime stats def get_system_stats(): gpu_info = torch.cuda.get_device_properties(0) if torch.cuda.is_available() else None memory = psutil.virtual_memory() return { "device": "GPU" if torch.cuda.is_available() else "CPU", "cpu_usage_percent": psutil.cpu_percent(), "memory_used_gb": round(memory.used / (1024 ** 3), 2), "memory_total_gb": round(memory.total / (1024 ** 3), 2), "gpu_name": gpu_info.name if gpu_info else "N/A", "gpu_memory_total_gb": round(gpu_info.total_memory / (1024 ** 3), 2) if gpu_info else "N/A", "gpu_memory_used_gb": round(torch.cuda.memory_allocated() / (1024 ** 3), 2) if gpu_info else "N/A", "platform": platform.platform() } # 🔁 Retrain from in-memory feedback_log def retrain_from_feedback(feedback_log): if not feedback_log: return "⚠️ No feedback data to retrain from." model = load_model() if model is None: return "❌ Evo model not available." model.train() optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) for row in feedback_log: question, opt1, opt2, answer, *_ = row label = torch.tensor([1.0 if answer.strip() == opt2.strip() else 0.0]) # opt2 = class 1 input_text = f"{question} [SEP] {opt2 if label.item() == 1 else opt1}" encoded = tokenizer(input_text, return_tensors="pt", padding="max_length", truncation=True, max_length=128) logits = model(encoded["input_ids"]) loss = F.binary_cross_entropy_with_logits(logits.squeeze(), label) loss.backward() optimizer.step() optimizer.zero_grad() torch.save(model.state_dict(), MODEL_PATH) return "✅ Evo retrained and reloaded from memory."