HemanM commited on
Commit
d023240
Β·
verified Β·
1 Parent(s): ec9b863

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +12 -10
inference.py CHANGED
@@ -18,7 +18,7 @@ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
18
  model = None
19
  last_mod_time = 0
20
 
21
- # πŸ” Reload model if changed on disk
22
  def load_model():
23
  global model, last_mod_time
24
  try:
@@ -34,18 +34,18 @@ def load_model():
34
  model = None
35
  return model
36
 
37
- # 🧠 Evo logic
38
  def get_evo_response(query, options, user_context=""):
39
  model = load_model()
40
  if model is None:
41
  return "Error", 0.0, "Model failed to load", ""
42
 
43
- # Retrieve web search + optional user context
44
  context_texts = web_search(query) + ([user_context] if user_context else [])
45
  context_str = "\n".join(context_texts)
46
  input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options]
47
 
48
- # Encode and score each option
49
  scores = []
50
  for pair in input_pairs:
51
  encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
@@ -56,13 +56,13 @@ def get_evo_response(query, options, user_context=""):
56
 
57
  best_idx = int(scores[1] > scores[0])
58
  return (
59
- options[best_idx], # βœ… Evo's answer
60
  max(scores), # βœ… Confidence
61
  f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}", # βœ… Reasoning trace
62
- context_str # βœ… Context used
63
  )
64
 
65
- # πŸ”„ GPT backup response
66
  def get_gpt_response(query, user_context=""):
67
  try:
68
  context_block = f"\n\nContext:\n{user_context}" if user_context else ""
@@ -75,7 +75,7 @@ def get_gpt_response(query, user_context=""):
75
  except Exception as e:
76
  return f"⚠️ GPT error:\n\n{str(e)}"
77
 
78
- # 🎯 Used by app.py to display Evo live output
79
  def evo_chat_predict(history, query, options):
80
  context = "\n".join(history[-6:]) if history else ""
81
  evo_ans, evo_score, evo_reason, evo_ctx = get_evo_response(query, options, context)
@@ -86,15 +86,17 @@ def evo_chat_predict(history, query, options):
86
  "context_used": evo_ctx
87
  }
88
 
89
- # πŸ“Š Returns current Evo architecture stats (for UI display)
90
  def get_model_config():
91
  return {
92
  "num_layers": 6,
93
  "num_heads": 8,
94
  "ffn_dim": 1024,
95
- "memory_enabled": True
 
96
  }
97
 
 
98
  def get_system_stats():
99
  gpu_info = torch.cuda.get_device_properties(0) if torch.cuda.is_available() else None
100
  memory = psutil.virtual_memory()
 
18
  model = None
19
  last_mod_time = 0
20
 
21
+ # πŸ” Reload model if updated on disk
22
  def load_model():
23
  global model, last_mod_time
24
  try:
 
34
  model = None
35
  return model
36
 
37
+ # 🧠 Evo inference logic
38
  def get_evo_response(query, options, user_context=""):
39
  model = load_model()
40
  if model is None:
41
  return "Error", 0.0, "Model failed to load", ""
42
 
43
+ # Context = web + user
44
  context_texts = web_search(query) + ([user_context] if user_context else [])
45
  context_str = "\n".join(context_texts)
46
  input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options]
47
 
48
+ # Score each option
49
  scores = []
50
  for pair in input_pairs:
51
  encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
 
56
 
57
  best_idx = int(scores[1] > scores[0])
58
  return (
59
+ options[best_idx], # βœ… Evo's best answer
60
  max(scores), # βœ… Confidence
61
  f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}", # βœ… Reasoning trace
62
+ context_str # βœ… Used context
63
  )
64
 
65
+ # πŸ”„ GPT-3.5 response
66
  def get_gpt_response(query, user_context=""):
67
  try:
68
  context_block = f"\n\nContext:\n{user_context}" if user_context else ""
 
75
  except Exception as e:
76
  return f"⚠️ GPT error:\n\n{str(e)}"
77
 
78
+ # 🎯 For EvoRAG app UI
79
  def evo_chat_predict(history, query, options):
80
  context = "\n".join(history[-6:]) if history else ""
81
  evo_ans, evo_score, evo_reason, evo_ctx = get_evo_response(query, options, context)
 
86
  "context_used": evo_ctx
87
  }
88
 
89
+ # πŸ“Š Evo architecture stats
90
  def get_model_config():
91
  return {
92
  "num_layers": 6,
93
  "num_heads": 8,
94
  "ffn_dim": 1024,
95
+ "memory_enabled": True,
96
+ "param_count": sum(p.numel() for p in model.parameters() if p.requires_grad) if model else "N/A"
97
  }
98
 
99
+ # πŸ’» Hardware and system stats
100
  def get_system_stats():
101
  gpu_info = torch.cuda.get_device_properties(0) if torch.cuda.is_available() else None
102
  memory = psutil.virtual_memory()