EvoPlatformV3 / inference.py
HemanM's picture
Update inference.py
777a225 verified
raw
history blame
4.52 kB
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer
from evo_model import EvoTransformerV22
from evo_architecture import build_model_from_config, mutate_genome, log_genome
import random
import csv
import os
import psutil
import platform
import GPUtil
import openai
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EvoTransformerV22().to(device)
model.eval()
current_config = {
"d_model": 512,
"num_heads": 8,
"ffn_dim": 1024,
"num_layers": 6,
"memory_enabled": True
}
FEEDBACK_LOG = "feedback_log.csv"
def evo_chat_predict(history, question, options):
combined_inputs = [f"{question} {opt}" for opt in options]
encodings = tokenizer(combined_inputs, padding=True, truncation=True, max_length=128, return_tensors="pt").to(device)
with torch.no_grad():
logits = model(encodings["input_ids"])
probs = torch.sigmoid(logits).squeeze().tolist()
best_idx = int(torch.argmax(torch.tensor(probs)))
reasoning = f"{options[0]}: {probs[0]:.3f} vs {options[1]}: {probs[1]:.3f}"
return {
"answer": options[best_idx],
"confidence": round(probs[best_idx], 3),
"reasoning": reasoning,
"context_used": question
}
def get_gpt_response(prompt):
openai.api_key = os.getenv("OPENAI_API_KEY", "sk-...")
try:
client = openai.OpenAI()
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"(GPT Error) {e}"
def get_model_config():
return {
"num_layers": current_config["num_layers"],
"num_heads": current_config["num_heads"],
"ffn_dim": current_config["ffn_dim"],
"memory_enabled": current_config["memory_enabled"],
"accuracy": "N/A"
}
def get_system_stats():
mem = psutil.virtual_memory()
cpu = psutil.cpu_percent()
try:
gpus = GPUtil.getGPUs()
gpu = gpus[0] if gpus else None
gpu_name = gpu.name if gpu else "N/A"
gpu_mem_used = round(gpu.memoryUsed / 1024, 2) if gpu else 0
gpu_mem_total = round(gpu.memoryTotal / 1024, 2) if gpu else 0
except:
gpu_name, gpu_mem_used, gpu_mem_total = "N/A", 0, 0
return {
"device": device.type,
"cpu_usage_percent": cpu,
"memory_used_gb": round(mem.used / 1024**3, 2),
"memory_total_gb": round(mem.total / 1024**3, 2),
"gpu_name": gpu_name,
"gpu_memory_used_gb": gpu_mem_used,
"gpu_memory_total_gb": gpu_mem_total,
"platform": platform.platform()
}
def retrain_from_feedback_csv():
if not os.path.exists(FEEDBACK_LOG):
return "⚠️ No feedback log file found."
data = []
with open(FEEDBACK_LOG, "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
vote = row.get("user_preference") or row.get("vote")
if vote in ["Evo", "GPT"]:
label = 1 if vote == "Evo" else 0
input_text = f"{row['question']} {row['option1']} {row['option2']}"
data.append((input_text, label))
if not data:
return "⚠️ No usable feedback data."
# Mutation logic
global current_config, model
new_config = mutate_genome(current_config)
model = build_model_from_config(new_config).to(device)
current_config = new_config
log_genome(new_config)
# Retrain logic
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
for epoch in range(3):
random.shuffle(data)
for text, label in data:
enc = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors="pt").to(device)
input_ids = enc["input_ids"]
label_tensor = torch.tensor([label], dtype=torch.float32).to(device)
logits = model(input_ids)
if logits.ndim == 2:
logits = logits.squeeze(1)
loss = F.binary_cross_entropy_with_logits(logits.squeeze(), label_tensor)
optimizer.zero_grad()
loss.backward()
optimizer.step()
model.eval()
return f"✅ Evo retrained on {len(data)} feedback entries."
def load_model(force_reload=False):
global model
model.eval()