Spaces:
Sleeping
Sleeping
Update inference.py
Browse files- inference.py +100 -145
inference.py
CHANGED
@@ -1,176 +1,131 @@
|
|
1 |
-
import os
|
2 |
import torch
|
3 |
import torch.nn.functional as F
|
4 |
from transformers import AutoTokenizer
|
5 |
-
from evo_model import
|
6 |
-
from evo_architecture import
|
7 |
-
|
8 |
-
import
|
9 |
-
import
|
10 |
import psutil
|
11 |
import platform
|
12 |
-
import
|
13 |
-
|
14 |
-
openai.api_key = os.getenv("OPENAI_API_KEY")
|
15 |
|
16 |
-
|
17 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
def is_fact_or_math(q):
|
47 |
-
q_lower = q.lower()
|
48 |
-
return any(char.isdigit() for char in q_lower) or any(op in q_lower for op in ["+", "-", "*", "/", "=", "what is", "solve", "calculate"])
|
49 |
-
|
50 |
-
if is_fact_or_math(query):
|
51 |
-
context_str = user_context or ""
|
52 |
-
else:
|
53 |
-
search_results = web_search(query)
|
54 |
-
context_str = "\n".join(search_results + ([user_context] if user_context else []))
|
55 |
-
|
56 |
-
input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options]
|
57 |
-
scores = []
|
58 |
-
|
59 |
-
for pair in input_pairs:
|
60 |
-
encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
|
61 |
-
with torch.no_grad():
|
62 |
-
logits = model(encoded["input_ids"])
|
63 |
-
score = torch.sigmoid(logits).item()
|
64 |
-
scores.append(score)
|
65 |
-
|
66 |
-
best_idx = int(scores[1] > scores[0])
|
67 |
-
return (
|
68 |
-
options[best_idx],
|
69 |
-
max(scores),
|
70 |
-
f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}",
|
71 |
-
context_str
|
72 |
-
)
|
73 |
-
|
74 |
|
75 |
-
|
76 |
-
|
77 |
try:
|
78 |
-
|
79 |
-
response = openai.chat.completions.create(
|
80 |
model="gpt-3.5-turbo",
|
81 |
-
messages=[{"role": "user", "content":
|
82 |
-
temperature=0.7,
|
83 |
)
|
84 |
-
return
|
85 |
except Exception as e:
|
86 |
-
return f"
|
87 |
-
|
88 |
-
|
89 |
-
# 🧠 Live Evo prediction logic
|
90 |
-
def evo_chat_predict(history, query, options):
|
91 |
-
try:
|
92 |
-
if isinstance(history, list):
|
93 |
-
context = "\n".join(history[-6:])
|
94 |
-
elif hasattr(history, "empty") and not history.empty:
|
95 |
-
context = "\n".join(history.tail(6).astype(str).tolist())
|
96 |
-
else:
|
97 |
-
context = ""
|
98 |
-
except Exception:
|
99 |
-
context = ""
|
100 |
|
101 |
-
|
102 |
return {
|
103 |
-
"
|
104 |
-
"
|
105 |
-
"
|
106 |
-
"
|
|
|
107 |
}
|
108 |
|
109 |
-
|
110 |
-
# 📊 Evo model config metadata
|
111 |
-
def get_model_config():
|
112 |
-
return current_config
|
113 |
-
|
114 |
-
|
115 |
-
# 🖥️ Runtime stats
|
116 |
def get_system_stats():
|
117 |
-
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
return {
|
120 |
-
"device":
|
121 |
-
"cpu_usage_percent":
|
122 |
-
"memory_used_gb": round(
|
123 |
-
"memory_total_gb": round(
|
124 |
-
"gpu_name":
|
125 |
-
"
|
126 |
-
"
|
127 |
"platform": platform.platform()
|
128 |
}
|
129 |
|
130 |
-
|
131 |
-
# 🔁 Retrain from feedback_log.csv and evolve architecture
|
132 |
def retrain_from_feedback_csv():
|
133 |
-
|
134 |
-
|
135 |
-
if not os.path.exists(path):
|
136 |
-
return "⚠️ No feedback_log.csv found."
|
137 |
|
138 |
-
|
139 |
-
with open(
|
140 |
reader = csv.DictReader(f)
|
141 |
for row in reader:
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
vote = row.get("user_preference", "").lower()
|
147 |
-
correct = row.get("evo_was_correct", "").lower()
|
148 |
-
if vote == "evo" or correct == "yes":
|
149 |
-
feedback_data.append((q, o1, o2, o2)) # Evo was correct
|
150 |
-
elif vote == "gpt":
|
151 |
-
feedback_data.append((q, o1, o2, o1)) # Evo was wrong
|
152 |
|
153 |
-
if not
|
154 |
return "⚠️ No usable feedback data."
|
155 |
|
156 |
-
#
|
157 |
-
current_config
|
158 |
-
|
|
|
|
|
|
|
|
|
|
|
159 |
model.train()
|
160 |
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
input_text = f"{question} [SEP] {opt2 if label.item() == 1 else opt1}"
|
166 |
-
encoded = tokenizer(input_text, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
|
167 |
-
|
168 |
-
logits = model(encoded["input_ids"])
|
169 |
-
loss = F.binary_cross_entropy_with_logits(logits.squeeze(dim=-1), label)
|
170 |
-
loss.backward()
|
171 |
-
optimizer.step()
|
172 |
-
optimizer.zero_grad()
|
173 |
-
|
174 |
-
torch.save(model.state_dict(), MODEL_PATH)
|
175 |
-
log_genome(current_config)
|
176 |
-
return "✅ Evo mutated, retrained, and saved."
|
|
|
|
|
1 |
import torch
|
2 |
import torch.nn.functional as F
|
3 |
from transformers import AutoTokenizer
|
4 |
+
from evo_model import EvoTransformerV22
|
5 |
+
from evo_architecture import build_model_from_config, mutate_genome, log_genome
|
6 |
+
import random
|
7 |
+
import csv
|
8 |
+
import os
|
9 |
import psutil
|
10 |
import platform
|
11 |
+
import GPUtil
|
12 |
+
import openai
|
|
|
13 |
|
14 |
+
# Load tokenizer
|
15 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
16 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
17 |
+
model = EvoTransformerV22().to(device)
|
18 |
+
model.eval()
|
19 |
+
|
20 |
+
current_config = {
|
21 |
+
"d_model": 512,
|
22 |
+
"num_heads": 8,
|
23 |
+
"ffn_dim": 1024,
|
24 |
+
"num_layers": 6,
|
25 |
+
"memory_enabled": True
|
26 |
+
}
|
27 |
+
FEEDBACK_LOG = "feedback_log.csv"
|
28 |
+
|
29 |
+
def evo_chat_predict(history, question, options):
|
30 |
+
combined_inputs = [f"{question} {opt}" for opt in options]
|
31 |
+
encodings = tokenizer(combined_inputs, padding=True, truncation=True, max_length=128, return_tensors="pt").to(device)
|
32 |
+
with torch.no_grad():
|
33 |
+
logits = model(encodings["input_ids"])
|
34 |
+
probs = torch.sigmoid(logits).squeeze().tolist()
|
35 |
+
best_idx = int(torch.argmax(torch.tensor(probs)))
|
36 |
+
reasoning = f"{options[0]}: {probs[0]:.3f} vs {options[1]}: {probs[1]:.3f}"
|
37 |
+
return {
|
38 |
+
"answer": options[best_idx],
|
39 |
+
"confidence": round(probs[best_idx], 3),
|
40 |
+
"reasoning": reasoning,
|
41 |
+
"context_used": question
|
42 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
+
def get_gpt_response(prompt):
|
45 |
+
openai.api_key = os.getenv("OPENAI_API_KEY", "sk-...")
|
46 |
try:
|
47 |
+
res = openai.ChatCompletion.create(
|
|
|
48 |
model="gpt-3.5-turbo",
|
49 |
+
messages=[{"role": "user", "content": prompt}]
|
|
|
50 |
)
|
51 |
+
return res.choices[0].message["content"]
|
52 |
except Exception as e:
|
53 |
+
return f"(GPT Error) {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
+
def get_model_config():
|
56 |
return {
|
57 |
+
"num_layers": current_config["num_layers"],
|
58 |
+
"num_heads": current_config["num_heads"],
|
59 |
+
"ffn_dim": current_config["ffn_dim"],
|
60 |
+
"memory_enabled": current_config["memory_enabled"],
|
61 |
+
"accuracy": "N/A"
|
62 |
}
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
def get_system_stats():
|
65 |
+
mem = psutil.virtual_memory()
|
66 |
+
cpu = psutil.cpu_percent()
|
67 |
+
try:
|
68 |
+
gpus = GPUtil.getGPUs()
|
69 |
+
gpu = gpus[0] if gpus else None
|
70 |
+
gpu_name = gpu.name if gpu else "N/A"
|
71 |
+
gpu_mem_used = round(gpu.memoryUsed / 1024, 2) if gpu else 0
|
72 |
+
gpu_mem_total = round(gpu.memoryTotal / 1024, 2) if gpu else 0
|
73 |
+
except:
|
74 |
+
gpu_name, gpu_mem_used, gpu_mem_total = "N/A", 0, 0
|
75 |
+
|
76 |
return {
|
77 |
+
"device": device.type,
|
78 |
+
"cpu_usage_percent": cpu,
|
79 |
+
"memory_used_gb": round(mem.used / 1024**3, 2),
|
80 |
+
"memory_total_gb": round(mem.total / 1024**3, 2),
|
81 |
+
"gpu_name": gpu_name,
|
82 |
+
"gpu_memory_used_gb": gpu_mem_used,
|
83 |
+
"gpu_memory_total_gb": gpu_mem_total,
|
84 |
"platform": platform.platform()
|
85 |
}
|
86 |
|
|
|
|
|
87 |
def retrain_from_feedback_csv():
|
88 |
+
if not os.path.exists(FEEDBACK_LOG):
|
89 |
+
return "⚠️ No feedback log file found."
|
|
|
|
|
90 |
|
91 |
+
data = []
|
92 |
+
with open(FEEDBACK_LOG, "r", encoding="utf-8") as f:
|
93 |
reader = csv.DictReader(f)
|
94 |
for row in reader:
|
95 |
+
if row.get("vote") in ["Evo", "GPT"]:
|
96 |
+
label = 1 if row["vote"] == "Evo" else 0
|
97 |
+
input_text = f"{row['question']} {row['option1']} {row['option2']}"
|
98 |
+
data.append((input_text, label))
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
+
if not data:
|
101 |
return "⚠️ No usable feedback data."
|
102 |
|
103 |
+
# Mutation logic
|
104 |
+
global current_config, model
|
105 |
+
new_config = mutate_genome(current_config)
|
106 |
+
model = build_model_from_config(new_config).to(device)
|
107 |
+
current_config = new_config
|
108 |
+
log_genome(new_config)
|
109 |
+
|
110 |
+
# Retrain logic
|
111 |
model.train()
|
112 |
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
|
113 |
+
for epoch in range(3):
|
114 |
+
random.shuffle(data)
|
115 |
+
total_loss = 0.0
|
116 |
+
for text, label in data:
|
117 |
+
enc = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors="pt").to(device)
|
118 |
+
input_ids = enc["input_ids"]
|
119 |
+
label_tensor = torch.tensor([label], dtype=torch.float32).to(device)
|
120 |
+
logits = model(input_ids)
|
121 |
+
loss = F.binary_cross_entropy_with_logits(logits.squeeze(), label_tensor)
|
122 |
+
optimizer.zero_grad()
|
123 |
+
loss.backward()
|
124 |
+
optimizer.step()
|
125 |
+
total_loss += loss.item()
|
126 |
+
model.eval()
|
127 |
+
return f"✅ Evo retrained on {len(data)} feedback entries."
|
128 |
|
129 |
+
def load_model(force_reload=False):
|
130 |
+
global model
|
131 |
+
model.eval()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|