HemanM commited on
Commit
8051dee
Β·
verified Β·
1 Parent(s): d023240

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +33 -47
inference.py CHANGED
@@ -5,11 +5,10 @@ from transformers import AutoTokenizer
5
  from evo_model import EvoTransformerV22
6
  from search_utils import web_search
7
  import openai
8
- import time
9
  import psutil
10
  import platform
11
 
12
- # πŸ” Load OpenAI API Key securely
13
  openai.api_key = os.getenv("OPENAI_API_KEY")
14
 
15
  # πŸ“¦ Constants
@@ -18,7 +17,7 @@ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
18
  model = None
19
  last_mod_time = 0
20
 
21
- # πŸ” Reload model if updated on disk
22
  def load_model():
23
  global model, last_mod_time
24
  try:
@@ -28,86 +27,73 @@ def load_model():
28
  model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu"))
29
  model.eval()
30
  last_mod_time = current_mod_time
31
- print("πŸ” Evo model reloaded.")
32
  except Exception as e:
33
- print(f"❌ Error loading Evo model: {e}")
34
  model = None
35
  return model
36
 
37
- # 🧠 Evo inference logic
38
- def get_evo_response(query, options, user_context=""):
39
  model = load_model()
40
  if model is None:
41
- return "Error", 0.0, "Model failed to load", ""
42
 
43
- # Context = web + user
44
- context_texts = web_search(query) + ([user_context] if user_context else [])
45
- context_str = "\n".join(context_texts)
46
- input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options]
47
 
48
- # Score each option
 
49
  scores = []
50
- for pair in input_pairs:
 
51
  encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
52
  with torch.no_grad():
53
  logits = model(encoded["input_ids"])
54
  score = torch.sigmoid(logits).item()
55
  scores.append(score)
56
 
57
- best_idx = int(scores[1] > scores[0])
58
- return (
59
- options[best_idx], # βœ… Evo's best answer
60
- max(scores), # βœ… Confidence
61
- f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}", # βœ… Reasoning trace
62
- context_str # βœ… Used context
63
- )
64
 
65
- # πŸ”„ GPT-3.5 response
66
- def get_gpt_response(query, user_context=""):
67
  try:
68
- context_block = f"\n\nContext:\n{user_context}" if user_context else ""
69
  response = openai.chat.completions.create(
70
  model="gpt-3.5-turbo",
71
- messages=[{"role": "user", "content": query + context_block}],
72
  temperature=0.7,
73
  )
74
  return response.choices[0].message.content.strip()
75
  except Exception as e:
76
- return f"⚠️ GPT error:\n\n{str(e)}"
77
-
78
- # 🎯 For EvoRAG app UI
79
- def evo_chat_predict(history, query, options):
80
- context = "\n".join(history[-6:]) if history else ""
81
- evo_ans, evo_score, evo_reason, evo_ctx = get_evo_response(query, options, context)
82
- return {
83
- "answer": evo_ans,
84
- "confidence": round(evo_score, 3),
85
- "reasoning": evo_reason,
86
- "context_used": evo_ctx
87
- }
88
 
89
- # πŸ“Š Evo architecture stats
90
  def get_model_config():
91
  return {
92
  "num_layers": 6,
93
  "num_heads": 8,
94
  "ffn_dim": 1024,
95
  "memory_enabled": True,
96
- "param_count": sum(p.numel() for p in model.parameters() if p.requires_grad) if model else "N/A"
97
  }
98
 
99
- # πŸ’» Hardware and system stats
100
  def get_system_stats():
101
- gpu_info = torch.cuda.get_device_properties(0) if torch.cuda.is_available() else None
102
- memory = psutil.virtual_memory()
103
 
104
  return {
105
  "device": "GPU" if torch.cuda.is_available() else "CPU",
106
  "cpu_usage_percent": psutil.cpu_percent(),
107
- "memory_used_gb": round(memory.used / (1024 ** 3), 2),
108
- "memory_total_gb": round(memory.total / (1024 ** 3), 2),
109
- "gpu_name": gpu_info.name if gpu_info else "N/A",
110
- "gpu_memory_total_gb": round(gpu_info.total_memory / (1024 ** 3), 2) if gpu_info else "N/A",
111
- "gpu_memory_used_gb": round(torch.cuda.memory_allocated() / (1024 ** 3), 2) if gpu_info else "N/A",
112
  "platform": platform.platform()
113
  }
 
5
  from evo_model import EvoTransformerV22
6
  from search_utils import web_search
7
  import openai
 
8
  import psutil
9
  import platform
10
 
11
+ # πŸ” Load OpenAI API Key
12
  openai.api_key = os.getenv("OPENAI_API_KEY")
13
 
14
  # πŸ“¦ Constants
 
17
  model = None
18
  last_mod_time = 0
19
 
20
+ # πŸ” Load or reload Evo model
21
  def load_model():
22
  global model, last_mod_time
23
  try:
 
27
  model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu"))
28
  model.eval()
29
  last_mod_time = current_mod_time
30
+ print("βœ… Evo model loaded.")
31
  except Exception as e:
32
+ print(f"❌ Error loading model: {e}")
33
  model = None
34
  return model
35
 
36
+ # 🧠 Evo response engine
37
+ def evo_infer(question, option1, option2, user_context=""):
38
  model = load_model()
39
  if model is None:
40
+ return "Evo failed", 0.0, "Model not loaded", ""
41
 
42
+ # 🌐 Retrieve context
43
+ context_blobs = web_search(question) + ([user_context] if user_context else [])
44
+ context = "\n".join(context_blobs)
 
45
 
46
+ # β›“ Format input pairs
47
+ inputs = [f"{question} [SEP] {opt} [CTX] {context}" for opt in [option1, option2]]
48
  scores = []
49
+
50
+ for pair in inputs:
51
  encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
52
  with torch.no_grad():
53
  logits = model(encoded["input_ids"])
54
  score = torch.sigmoid(logits).item()
55
  scores.append(score)
56
 
57
+ best = option2 if scores[1] > scores[0] else option1
58
+ trace = f"{option1}: {scores[0]:.3f} vs {option2}: {scores[1]:.3f}"
59
+
60
+ return best, max(scores), trace, context
 
 
 
61
 
62
+ # πŸ”„ GPT backup
63
+ def gpt_infer(question, user_context=""):
64
  try:
65
+ block = f"\n\nContext:\n{user_context}" if user_context else ""
66
  response = openai.chat.completions.create(
67
  model="gpt-3.5-turbo",
68
+ messages=[{"role": "user", "content": question + block}],
69
  temperature=0.7,
70
  )
71
  return response.choices[0].message.content.strip()
72
  except Exception as e:
73
+ return f"⚠️ GPT error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ # πŸ“Š Evo architecture info
76
  def get_model_config():
77
  return {
78
  "num_layers": 6,
79
  "num_heads": 8,
80
  "ffn_dim": 1024,
81
  "memory_enabled": True,
82
+ "total_params": sum(p.numel() for p in model.parameters()) if model else "N/A"
83
  }
84
 
85
+ # πŸ’» System stats
86
  def get_system_stats():
87
+ gpu = torch.cuda.get_device_properties(0) if torch.cuda.is_available() else None
88
+ mem = psutil.virtual_memory()
89
 
90
  return {
91
  "device": "GPU" if torch.cuda.is_available() else "CPU",
92
  "cpu_usage_percent": psutil.cpu_percent(),
93
+ "memory_used_gb": round(mem.used / (1024 ** 3), 2),
94
+ "memory_total_gb": round(mem.total / (1024 ** 3), 2),
95
+ "gpu_name": gpu.name if gpu else "N/A",
96
+ "gpu_memory_total_gb": round(gpu.total_memory / (1024 ** 3), 2) if gpu else "N/A",
97
+ "gpu_memory_used_gb": round(torch.cuda.memory_allocated() / (1024 ** 3), 2) if gpu else "N/A",
98
  "platform": platform.platform()
99
  }