EvoPlatformV3

Sleeping

App Files Files Community

HemanM commited on Jul 28

Commit

a499eab

verified ·

1 Parent(s): 57ea867

Update inference.py

Browse files

Files changed (1) hide show

inference.py +100 -145

inference.py CHANGED Viewed

@@ -1,176 +1,131 @@
-import os
 import torch
 import torch.nn.functional as F
 from transformers import AutoTokenizer
-from evo_model import build_model_from_config
-from evo_architecture import mutate_genome, default_config, log_genome
-from search_utils import web_search
-import openai
-import time
 import psutil
 import platform
-import csv
-openai.api_key = os.getenv("OPENAI_API_KEY")
-MODEL_PATH = "evo_hellaswag.pt"
 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
-model = None
-last_mod_time = 0
-current_config = default_config()
-# 🔁 Load Evo model with auto-reload and dynamic config
-def load_model(force_reload=False):
-    global model, last_mod_time, current_config
-    try:
-        current_mod_time = os.path.getmtime(MODEL_PATH)
-        if model is None or force_reload or current_mod_time > last_mod_time:
-            model = build_model_from_config(current_config)
-            model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu"))
-            model.eval()
-            last_mod_time = current_mod_time
-            print("✅ Evo model loaded.")
-    except Exception as e:
-        print(f"❌ Error loading Evo model: {e}")
-        model = None
-    return model
-# 🔮 Evo inference core logic
-def evo_infer(query, options, user_context=""):
-    model = load_model()
-    if model is None:
-        return "Model Error", 0.0, "Model not available", ""
-    def is_fact_or_math(q):
-        q_lower = q.lower()
-        return any(char.isdigit() for char in q_lower) or any(op in q_lower for op in ["+", "-", "*", "/", "=", "what is", "solve", "calculate"])
-    if is_fact_or_math(query):
-        context_str = user_context or ""
-    else:
-        search_results = web_search(query)
-        context_str = "\n".join(search_results + ([user_context] if user_context else []))
-    input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options]
-    scores = []
-    for pair in input_pairs:
-        encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
-        with torch.no_grad():
-            logits = model(encoded["input_ids"])
-            score = torch.sigmoid(logits).item()
-            scores.append(score)
-    best_idx = int(scores[1] > scores[0])
-    return (
-        options[best_idx],
-        max(scores),
-        f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}",
-        context_str
-    )
-# 🤖 GPT fallback (for comparison)
-def get_gpt_response(query, user_context=""):
     try:
-        context_block = f"\n\nContext:\n{user_context}" if user_context else ""
-        response = openai.chat.completions.create(
             model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": query + context_block}],
-            temperature=0.7,
         )
-        return response.choices[0].message.content.strip()
     except Exception as e:
-        return f"⚠️ GPT error:\n{str(e)}"
-# 🧠 Live Evo prediction logic
-def evo_chat_predict(history, query, options):
-    try:
-        if isinstance(history, list):
-            context = "\n".join(history[-6:])
-        elif hasattr(history, "empty") and not history.empty:
-            context = "\n".join(history.tail(6).astype(str).tolist())
-        else:
-            context = ""
-    except Exception:
-        context = ""
-    evo_ans, evo_score, evo_reason, evo_ctx = evo_infer(query, options, context)
     return {
-        "answer": evo_ans,
-        "confidence": round(evo_score, 3),
-        "reasoning": evo_reason,
-        "context_used": evo_ctx
     }
-# 📊 Evo model config metadata
-def get_model_config():
-    return current_config
-# 🖥️ Runtime stats
 def get_system_stats():
-    gpu_info = torch.cuda.get_device_properties(0) if torch.cuda.is_available() else None
-    memory = psutil.virtual_memory()
     return {
-        "device": "GPU" if torch.cuda.is_available() else "CPU",
-        "cpu_usage_percent": psutil.cpu_percent(),
-        "memory_used_gb": round(memory.used / (1024 ** 3), 2),
-        "memory_total_gb": round(memory.total / (1024 ** 3), 2),
-        "gpu_name": gpu_info.name if gpu_info else "N/A",
-        "gpu_memory_total_gb": round(gpu_info.total_memory / (1024 ** 3), 2) if gpu_info else "N/A",
-        "gpu_memory_used_gb": round(torch.cuda.memory_allocated() / (1024 ** 3), 2) if gpu_info else "N/A",
         "platform": platform.platform()
     }
-# 🔁 Retrain from feedback_log.csv and evolve architecture
 def retrain_from_feedback_csv():
-    global current_config, model
-    path = "feedback_log.csv"
-    if not os.path.exists(path):
-        return "⚠️ No feedback_log.csv found."
-    feedback_data = []
-    with open(path, newline='', encoding="utf-8") as f:
         reader = csv.DictReader(f)
         for row in reader:
-            q = row["question"]
-            o1 = row["option1"]
-            o2 = row["option2"]
-            ctx = row["context"]
-            vote = row.get("user_preference", "").lower()
-            correct = row.get("evo_was_correct", "").lower()
-            if vote == "evo" or correct == "yes":
-                feedback_data.append((q, o1, o2, o2))  # Evo was correct
-            elif vote == "gpt":
-                feedback_data.append((q, o1, o2, o1))  # Evo was wrong
-    if not feedback_data:
         return "⚠️ No usable feedback data."
-    # Mutate and retrain new model
-    current_config = mutate_genome(current_config)
-    model = build_model_from_config(current_config)
     model.train()
     optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
-    for row in feedback_data:
-        question, opt1, opt2, answer = row
-        label = torch.tensor([1.0 if answer.strip() == opt2.strip() else 0.0])
-        input_text = f"{question} [SEP] {opt2 if label.item() == 1 else opt1}"
-        encoded = tokenizer(input_text, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
-        logits = model(encoded["input_ids"])
-        loss = F.binary_cross_entropy_with_logits(logits.squeeze(dim=-1), label)
-        loss.backward()
-        optimizer.step()
-        optimizer.zero_grad()
-    torch.save(model.state_dict(), MODEL_PATH)
-    log_genome(current_config)
-    return "✅ Evo mutated, retrained, and saved."

 import torch
 import torch.nn.functional as F
 from transformers import AutoTokenizer
+from evo_model import EvoTransformerV22
+from evo_architecture import build_model_from_config, mutate_genome, log_genome
+import random
+import csv
+import os
 import psutil
 import platform
+import GPUtil
+import openai
+# Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = EvoTransformerV22().to(device)
+model.eval()
+current_config = {
+    "d_model": 512,
+    "num_heads": 8,
+    "ffn_dim": 1024,
+    "num_layers": 6,
+    "memory_enabled": True
+}
+FEEDBACK_LOG = "feedback_log.csv"
+def evo_chat_predict(history, question, options):
+    combined_inputs = [f"{question} {opt}" for opt in options]
+    encodings = tokenizer(combined_inputs, padding=True, truncation=True, max_length=128, return_tensors="pt").to(device)
+    with torch.no_grad():
+        logits = model(encodings["input_ids"])
+        probs = torch.sigmoid(logits).squeeze().tolist()
+    best_idx = int(torch.argmax(torch.tensor(probs)))
+    reasoning = f"{options[0]}: {probs[0]:.3f} vs {options[1]}: {probs[1]:.3f}"
+    return {
+        "answer": options[best_idx],
+        "confidence": round(probs[best_idx], 3),
+        "reasoning": reasoning,
+        "context_used": question
+    }
+def get_gpt_response(prompt):
+    openai.api_key = os.getenv("OPENAI_API_KEY", "sk-...")
     try:
+        res = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": prompt}]
         )
+        return res.choices[0].message["content"]
     except Exception as e:
+        return f"(GPT Error) {e}"
+def get_model_config():
     return {
+        "num_layers": current_config["num_layers"],
+        "num_heads": current_config["num_heads"],
+        "ffn_dim": current_config["ffn_dim"],
+        "memory_enabled": current_config["memory_enabled"],
+        "accuracy": "N/A"
     }
 def get_system_stats():
+    mem = psutil.virtual_memory()
+    cpu = psutil.cpu_percent()
+    try:
+        gpus = GPUtil.getGPUs()
+        gpu = gpus[0] if gpus else None
+        gpu_name = gpu.name if gpu else "N/A"
+        gpu_mem_used = round(gpu.memoryUsed / 1024, 2) if gpu else 0
+        gpu_mem_total = round(gpu.memoryTotal / 1024, 2) if gpu else 0
+    except:
+        gpu_name, gpu_mem_used, gpu_mem_total = "N/A", 0, 0
     return {
+        "device": device.type,
+        "cpu_usage_percent": cpu,
+        "memory_used_gb": round(mem.used / 1024**3, 2),
+        "memory_total_gb": round(mem.total / 1024**3, 2),
+        "gpu_name": gpu_name,
+        "gpu_memory_used_gb": gpu_mem_used,
+        "gpu_memory_total_gb": gpu_mem_total,
         "platform": platform.platform()
     }
 def retrain_from_feedback_csv():
+    if not os.path.exists(FEEDBACK_LOG):
+        return "⚠️ No feedback log file found."
+    data = []
+    with open(FEEDBACK_LOG, "r", encoding="utf-8") as f:
         reader = csv.DictReader(f)
         for row in reader:
+            if row.get("vote") in ["Evo", "GPT"]:
+                label = 1 if row["vote"] == "Evo" else 0
+                input_text = f"{row['question']} {row['option1']} {row['option2']}"
+                data.append((input_text, label))
+    if not data:
         return "⚠️ No usable feedback data."
+    # Mutation logic
+    global current_config, model
+    new_config = mutate_genome(current_config)
+    model = build_model_from_config(new_config).to(device)
+    current_config = new_config
+    log_genome(new_config)
+    # Retrain logic
     model.train()
     optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
+    for epoch in range(3):
+        random.shuffle(data)
+        total_loss = 0.0
+        for text, label in data:
+            enc = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors="pt").to(device)
+            input_ids = enc["input_ids"]
+            label_tensor = torch.tensor([label], dtype=torch.float32).to(device)
+            logits = model(input_ids)
+            loss = F.binary_cross_entropy_with_logits(logits.squeeze(), label_tensor)
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            total_loss += loss.item()
+    model.eval()
+    return f"✅ Evo retrained on {len(data)} feedback entries."
+def load_model(force_reload=False):
+    global model
+    model.eval()