Spaces:
Sleeping
Sleeping
File size: 4,914 Bytes
1c8999c 785c4f7 f7f9a8a 11f2d5b a499eab 1c8999c a499eab 0067953 a499eab 1c8999c 70b5bb7 1c8999c f7f9a8a a499eab 1c8999c a499eab 1c8999c a499eab 1c8999c a499eab 1c8999c a499eab fca1cb1 1c8999c a499eab 5b3d26d 777a225 5b3d26d a499eab 5b3d26d 777a225 5b3d26d a499eab 14bd5d8 1c8999c a499eab cdd53f3 1c8999c cdd53f3 8ed51aa 1c8999c 0067953 a499eab 0067953 a499eab 0067953 b50967b 1c8999c fca1cb1 1c8999c 95db7be 1c8999c 95db7be 1c8999c 95db7be 1c8999c 95db7be 1c8999c 95db7be 1c8999c fca1cb1 1c8999c a499eab 1c8999c fca1cb1 a499eab fca1cb1 1c8999c a499eab 1c8999c b50967b a499eab 1c8999c a499eab 1c8999c a499eab 1c8999c a499eab b50967b 1c8999c a499eab 1c8999c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
# inference.py
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer
from evo_model import EvoTransformerV22
from evo_architecture import (
build_model_from_config,
mutate_genome,
log_genome,
save_best_genome,
load_best_genome
)
import random
import csv
import os
import psutil
import platform
import GPUtil
import openai
import pandas as pd
# ๐ Load OpenAI key
openai.api_key = os.getenv("OPENAI_API_KEY", "sk-...")
# โ๏ธ Runtime setup
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# ๐ Mutable model & config
current_config = load_best_genome()
model = build_model_from_config(current_config).to(device)
model.eval()
FEEDBACK_LOG = "feedback_log.csv"
# ๐ง Evo prediction
def evo_chat_predict(history, question, options):
inputs = [f"{question} {opt}" for opt in options]
enc = tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt").to(device)
with torch.no_grad():
logits = model(enc["input_ids"])
probs = torch.sigmoid(logits).squeeze().tolist()
best_idx = int(torch.argmax(torch.tensor(probs)))
reasoning = f"{options[0]}: {probs[0]:.3f} vs {options[1]}: {probs[1]:.3f}"
return {
"answer": options[best_idx],
"confidence": round(probs[best_idx], 3),
"reasoning": reasoning,
"context_used": question
}
# ๐ค GPT comparison
def get_gpt_response(prompt):
try:
client = openai.OpenAI()
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"(GPT Error) {e}"
# ๐ Evo stats
def get_model_config():
return {
"num_layers": current_config.get("num_layers", "?"),
"num_heads": current_config.get("num_heads", "?"),
"ffn_dim": current_config.get("ffn_dim", "?"),
"memory_enabled": current_config.get("memory_enabled", "?"),
"accuracy": current_config.get("accuracy", "N/A")
}
# ๐ฅ๏ธ System info
def get_system_stats():
mem = psutil.virtual_memory()
cpu = psutil.cpu_percent()
try:
gpus = GPUtil.getGPUs()
gpu = gpus[0] if gpus else None
gpu_name = gpu.name if gpu else "N/A"
gpu_mem_used = round(gpu.memoryUsed / 1024, 2) if gpu else 0
gpu_mem_total = round(gpu.memoryTotal / 1024, 2) if gpu else 0
except:
gpu_name, gpu_mem_used, gpu_mem_total = "N/A", 0, 0
return {
"device": device.type,
"cpu_usage_percent": cpu,
"memory_used_gb": round(mem.used / 1024**3, 2),
"memory_total_gb": round(mem.total / 1024**3, 2),
"gpu_name": gpu_name,
"gpu_memory_used_gb": gpu_mem_used,
"gpu_memory_total_gb": gpu_mem_total,
"platform": platform.platform()
}
# ๐ Retrain from feedback
def retrain_from_feedback_csv():
global current_config, model
if not os.path.exists(FEEDBACK_LOG):
return "โ ๏ธ No feedback log found."
df = pd.read_csv(FEEDBACK_LOG)
# Validate votes
if df.empty or "vote" not in df.columns or df["vote"].dropna().empty:
return "โ ๏ธ No usable feedback data. Please vote on Evo or GPT."
df = df[df["vote"].isin(["Evo", "GPT"])]
if df.empty:
return "โ ๏ธ No usable feedback data. Please vote on Evo or GPT."
# Prepare training data
data = []
for _, row in df.iterrows():
label = 1 if row["vote"] == "Evo" else 0
text = f"{row['question']} {row['option1']} {row['option2']}"
data.append((text, label))
if not data:
return "โ ๏ธ No usable feedback data."
# Mutate config
new_config = mutate_genome(current_config)
model = build_model_from_config(new_config).to(device)
current_config = new_config
log_genome(new_config)
# Fine-tune model
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
for epoch in range(3):
random.shuffle(data)
for text, label in data:
enc = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors="pt").to(device)
input_ids = enc["input_ids"]
label_tensor = torch.tensor([label], dtype=torch.float32).to(device)
logits = model(input_ids).squeeze(1)
loss = F.binary_cross_entropy_with_logits(logits, label_tensor)
optimizer.zero_grad()
loss.backward()
optimizer.step()
model.eval()
save_best_genome({**new_config, "accuracy": "Live-Finetuned"})
return f"โ
Evo retrained on {len(data)} feedback entries."
# ๐ Reload model
def load_model(force_reload=False):
global model
model.eval() |