HemanM commited on
Commit
a499eab
·
verified ·
1 Parent(s): 57ea867

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +100 -145
inference.py CHANGED
@@ -1,176 +1,131 @@
1
- import os
2
  import torch
3
  import torch.nn.functional as F
4
  from transformers import AutoTokenizer
5
- from evo_model import build_model_from_config
6
- from evo_architecture import mutate_genome, default_config, log_genome
7
- from search_utils import web_search
8
- import openai
9
- import time
10
  import psutil
11
  import platform
12
- import csv
13
-
14
- openai.api_key = os.getenv("OPENAI_API_KEY")
15
 
16
- MODEL_PATH = "evo_hellaswag.pt"
17
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
18
- model = None
19
- last_mod_time = 0
20
- current_config = default_config()
21
-
22
-
23
- # 🔁 Load Evo model with auto-reload and dynamic config
24
- def load_model(force_reload=False):
25
- global model, last_mod_time, current_config
26
- try:
27
- current_mod_time = os.path.getmtime(MODEL_PATH)
28
- if model is None or force_reload or current_mod_time > last_mod_time:
29
- model = build_model_from_config(current_config)
30
- model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu"))
31
- model.eval()
32
- last_mod_time = current_mod_time
33
- print("✅ Evo model loaded.")
34
- except Exception as e:
35
- print(f"❌ Error loading Evo model: {e}")
36
- model = None
37
- return model
38
-
39
-
40
- # 🔮 Evo inference core logic
41
- def evo_infer(query, options, user_context=""):
42
- model = load_model()
43
- if model is None:
44
- return "Model Error", 0.0, "Model not available", ""
45
-
46
- def is_fact_or_math(q):
47
- q_lower = q.lower()
48
- return any(char.isdigit() for char in q_lower) or any(op in q_lower for op in ["+", "-", "*", "/", "=", "what is", "solve", "calculate"])
49
-
50
- if is_fact_or_math(query):
51
- context_str = user_context or ""
52
- else:
53
- search_results = web_search(query)
54
- context_str = "\n".join(search_results + ([user_context] if user_context else []))
55
-
56
- input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options]
57
- scores = []
58
-
59
- for pair in input_pairs:
60
- encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
61
- with torch.no_grad():
62
- logits = model(encoded["input_ids"])
63
- score = torch.sigmoid(logits).item()
64
- scores.append(score)
65
-
66
- best_idx = int(scores[1] > scores[0])
67
- return (
68
- options[best_idx],
69
- max(scores),
70
- f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}",
71
- context_str
72
- )
73
-
74
 
75
- # 🤖 GPT fallback (for comparison)
76
- def get_gpt_response(query, user_context=""):
77
  try:
78
- context_block = f"\n\nContext:\n{user_context}" if user_context else ""
79
- response = openai.chat.completions.create(
80
  model="gpt-3.5-turbo",
81
- messages=[{"role": "user", "content": query + context_block}],
82
- temperature=0.7,
83
  )
84
- return response.choices[0].message.content.strip()
85
  except Exception as e:
86
- return f"⚠️ GPT error:\n{str(e)}"
87
-
88
-
89
- # 🧠 Live Evo prediction logic
90
- def evo_chat_predict(history, query, options):
91
- try:
92
- if isinstance(history, list):
93
- context = "\n".join(history[-6:])
94
- elif hasattr(history, "empty") and not history.empty:
95
- context = "\n".join(history.tail(6).astype(str).tolist())
96
- else:
97
- context = ""
98
- except Exception:
99
- context = ""
100
 
101
- evo_ans, evo_score, evo_reason, evo_ctx = evo_infer(query, options, context)
102
  return {
103
- "answer": evo_ans,
104
- "confidence": round(evo_score, 3),
105
- "reasoning": evo_reason,
106
- "context_used": evo_ctx
 
107
  }
108
 
109
-
110
- # 📊 Evo model config metadata
111
- def get_model_config():
112
- return current_config
113
-
114
-
115
- # 🖥️ Runtime stats
116
  def get_system_stats():
117
- gpu_info = torch.cuda.get_device_properties(0) if torch.cuda.is_available() else None
118
- memory = psutil.virtual_memory()
 
 
 
 
 
 
 
 
 
119
  return {
120
- "device": "GPU" if torch.cuda.is_available() else "CPU",
121
- "cpu_usage_percent": psutil.cpu_percent(),
122
- "memory_used_gb": round(memory.used / (1024 ** 3), 2),
123
- "memory_total_gb": round(memory.total / (1024 ** 3), 2),
124
- "gpu_name": gpu_info.name if gpu_info else "N/A",
125
- "gpu_memory_total_gb": round(gpu_info.total_memory / (1024 ** 3), 2) if gpu_info else "N/A",
126
- "gpu_memory_used_gb": round(torch.cuda.memory_allocated() / (1024 ** 3), 2) if gpu_info else "N/A",
127
  "platform": platform.platform()
128
  }
129
 
130
-
131
- # 🔁 Retrain from feedback_log.csv and evolve architecture
132
  def retrain_from_feedback_csv():
133
- global current_config, model
134
- path = "feedback_log.csv"
135
- if not os.path.exists(path):
136
- return "⚠️ No feedback_log.csv found."
137
 
138
- feedback_data = []
139
- with open(path, newline='', encoding="utf-8") as f:
140
  reader = csv.DictReader(f)
141
  for row in reader:
142
- q = row["question"]
143
- o1 = row["option1"]
144
- o2 = row["option2"]
145
- ctx = row["context"]
146
- vote = row.get("user_preference", "").lower()
147
- correct = row.get("evo_was_correct", "").lower()
148
- if vote == "evo" or correct == "yes":
149
- feedback_data.append((q, o1, o2, o2)) # Evo was correct
150
- elif vote == "gpt":
151
- feedback_data.append((q, o1, o2, o1)) # Evo was wrong
152
 
153
- if not feedback_data:
154
  return "⚠️ No usable feedback data."
155
 
156
- # Mutate and retrain new model
157
- current_config = mutate_genome(current_config)
158
- model = build_model_from_config(current_config)
 
 
 
 
 
159
  model.train()
160
  optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
- for row in feedback_data:
163
- question, opt1, opt2, answer = row
164
- label = torch.tensor([1.0 if answer.strip() == opt2.strip() else 0.0])
165
- input_text = f"{question} [SEP] {opt2 if label.item() == 1 else opt1}"
166
- encoded = tokenizer(input_text, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
167
-
168
- logits = model(encoded["input_ids"])
169
- loss = F.binary_cross_entropy_with_logits(logits.squeeze(dim=-1), label)
170
- loss.backward()
171
- optimizer.step()
172
- optimizer.zero_grad()
173
-
174
- torch.save(model.state_dict(), MODEL_PATH)
175
- log_genome(current_config)
176
- return "✅ Evo mutated, retrained, and saved."
 
 
1
  import torch
2
  import torch.nn.functional as F
3
  from transformers import AutoTokenizer
4
+ from evo_model import EvoTransformerV22
5
+ from evo_architecture import build_model_from_config, mutate_genome, log_genome
6
+ import random
7
+ import csv
8
+ import os
9
  import psutil
10
  import platform
11
+ import GPUtil
12
+ import openai
 
13
 
14
+ # Load tokenizer
15
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
16
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
+ model = EvoTransformerV22().to(device)
18
+ model.eval()
19
+
20
+ current_config = {
21
+ "d_model": 512,
22
+ "num_heads": 8,
23
+ "ffn_dim": 1024,
24
+ "num_layers": 6,
25
+ "memory_enabled": True
26
+ }
27
+ FEEDBACK_LOG = "feedback_log.csv"
28
+
29
+ def evo_chat_predict(history, question, options):
30
+ combined_inputs = [f"{question} {opt}" for opt in options]
31
+ encodings = tokenizer(combined_inputs, padding=True, truncation=True, max_length=128, return_tensors="pt").to(device)
32
+ with torch.no_grad():
33
+ logits = model(encodings["input_ids"])
34
+ probs = torch.sigmoid(logits).squeeze().tolist()
35
+ best_idx = int(torch.argmax(torch.tensor(probs)))
36
+ reasoning = f"{options[0]}: {probs[0]:.3f} vs {options[1]}: {probs[1]:.3f}"
37
+ return {
38
+ "answer": options[best_idx],
39
+ "confidence": round(probs[best_idx], 3),
40
+ "reasoning": reasoning,
41
+ "context_used": question
42
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ def get_gpt_response(prompt):
45
+ openai.api_key = os.getenv("OPENAI_API_KEY", "sk-...")
46
  try:
47
+ res = openai.ChatCompletion.create(
 
48
  model="gpt-3.5-turbo",
49
+ messages=[{"role": "user", "content": prompt}]
 
50
  )
51
+ return res.choices[0].message["content"]
52
  except Exception as e:
53
+ return f"(GPT Error) {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ def get_model_config():
56
  return {
57
+ "num_layers": current_config["num_layers"],
58
+ "num_heads": current_config["num_heads"],
59
+ "ffn_dim": current_config["ffn_dim"],
60
+ "memory_enabled": current_config["memory_enabled"],
61
+ "accuracy": "N/A"
62
  }
63
 
 
 
 
 
 
 
 
64
  def get_system_stats():
65
+ mem = psutil.virtual_memory()
66
+ cpu = psutil.cpu_percent()
67
+ try:
68
+ gpus = GPUtil.getGPUs()
69
+ gpu = gpus[0] if gpus else None
70
+ gpu_name = gpu.name if gpu else "N/A"
71
+ gpu_mem_used = round(gpu.memoryUsed / 1024, 2) if gpu else 0
72
+ gpu_mem_total = round(gpu.memoryTotal / 1024, 2) if gpu else 0
73
+ except:
74
+ gpu_name, gpu_mem_used, gpu_mem_total = "N/A", 0, 0
75
+
76
  return {
77
+ "device": device.type,
78
+ "cpu_usage_percent": cpu,
79
+ "memory_used_gb": round(mem.used / 1024**3, 2),
80
+ "memory_total_gb": round(mem.total / 1024**3, 2),
81
+ "gpu_name": gpu_name,
82
+ "gpu_memory_used_gb": gpu_mem_used,
83
+ "gpu_memory_total_gb": gpu_mem_total,
84
  "platform": platform.platform()
85
  }
86
 
 
 
87
  def retrain_from_feedback_csv():
88
+ if not os.path.exists(FEEDBACK_LOG):
89
+ return "⚠️ No feedback log file found."
 
 
90
 
91
+ data = []
92
+ with open(FEEDBACK_LOG, "r", encoding="utf-8") as f:
93
  reader = csv.DictReader(f)
94
  for row in reader:
95
+ if row.get("vote") in ["Evo", "GPT"]:
96
+ label = 1 if row["vote"] == "Evo" else 0
97
+ input_text = f"{row['question']} {row['option1']} {row['option2']}"
98
+ data.append((input_text, label))
 
 
 
 
 
 
99
 
100
+ if not data:
101
  return "⚠️ No usable feedback data."
102
 
103
+ # Mutation logic
104
+ global current_config, model
105
+ new_config = mutate_genome(current_config)
106
+ model = build_model_from_config(new_config).to(device)
107
+ current_config = new_config
108
+ log_genome(new_config)
109
+
110
+ # Retrain logic
111
  model.train()
112
  optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
113
+ for epoch in range(3):
114
+ random.shuffle(data)
115
+ total_loss = 0.0
116
+ for text, label in data:
117
+ enc = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors="pt").to(device)
118
+ input_ids = enc["input_ids"]
119
+ label_tensor = torch.tensor([label], dtype=torch.float32).to(device)
120
+ logits = model(input_ids)
121
+ loss = F.binary_cross_entropy_with_logits(logits.squeeze(), label_tensor)
122
+ optimizer.zero_grad()
123
+ loss.backward()
124
+ optimizer.step()
125
+ total_loss += loss.item()
126
+ model.eval()
127
+ return f"✅ Evo retrained on {len(data)} feedback entries."
128
 
129
+ def load_model(force_reload=False):
130
+ global model
131
+ model.eval()