EvoPlatformV3

Sleeping

App Files Files Community

HemanM commited on Jul 26

Commit

541d702

verified ·

1 Parent(s): b0ba5ba

Update inference.py

Browse files

Files changed (1) hide show

inference.py +34 -27

inference.py CHANGED Viewed

@@ -10,34 +10,40 @@ import time
 # 🔐 Load OpenAI API Key securely
 openai.api_key = os.getenv("OPENAI_API_KEY")
-# 🔁 Track model changes
 MODEL_PATH = "evo_hellaswag.pt"
-last_mod_time = 0
-model = None
 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
-# 📦 Load model with auto-reload if file is updated
 def load_model():
     global model, last_mod_time
-    current_mod_time = os.path.getmtime(MODEL_PATH)
-    if model is None or current_mod_time > last_mod_time:
-        model = EvoTransformerV22()
-        model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu"))
-        model.eval()
-        last_mod_time = current_mod_time
-        print("🔁 Evo model reloaded.")
     return model
-# 🧠 Evo decision logic with confidence scores
 def get_evo_response(query, options, user_context=""):
     model = load_model()
-    # Retrieve RAG context + optional user input
     context_texts = web_search(query) + ([user_context] if user_context else [])
     context_str = "\n".join(context_texts)
     input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options]
-    # Encode both options and compute scores
     scores = []
     for pair in input_pairs:
         encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
@@ -48,13 +54,13 @@ def get_evo_response(query, options, user_context=""):
     best_idx = int(scores[1] > scores[0])
     return (
-        options[best_idx],  # 🔹 Selected answer
-        max(scores),        # 🔹 Confidence score
-        f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}",  # 🔹 Reasoning trace
-        context_str         # 🔹 Context used
     )
-# 🤖 GPT-3.5 backup or comparison
 def get_gpt_response(query, user_context=""):
     try:
         context_block = f"\n\nContext:\n{user_context}" if user_context else ""
@@ -67,17 +73,10 @@ def get_gpt_response(query, user_context=""):
     except Exception as e:
         return f"⚠️ GPT error:\n\n{str(e)}"
-# ✅ Final callable interface
-def infer(query, options, user_context=""):
-    return get_evo_response(query, options, user_context)
-# 🧠 Unified chat-style interface for EvoRAG
 def evo_chat_predict(history, query, options):
-    # Use the last few exchanges as context (up to 3 pairs)
     context = "\n".join(history[-6:]) if history else ""
     evo_ans, evo_score, evo_reason, evo_ctx = get_evo_response(query, options, context)
     return {
         "answer": evo_ans,
         "confidence": round(evo_score, 3),
@@ -85,3 +84,11 @@ def evo_chat_predict(history, query, options):
         "context_used": evo_ctx
     }

 # 🔐 Load OpenAI API Key securely
 openai.api_key = os.getenv("OPENAI_API_KEY")
+# 📦 Constants
 MODEL_PATH = "evo_hellaswag.pt"
 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+model = None
+last_mod_time = 0
+# 🔁 Reload model if changed on disk
 def load_model():
     global model, last_mod_time
+    try:
+        current_mod_time = os.path.getmtime(MODEL_PATH)
+        if model is None or current_mod_time > last_mod_time:
+            model = EvoTransformerV22()
+            model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu"))
+            model.eval()
+            last_mod_time = current_mod_time
+            print("🔁 Evo model reloaded.")
+    except Exception as e:
+        print(f"❌ Error loading Evo model: {e}")
+        model = None
     return model
+# 🧠 Evo logic
 def get_evo_response(query, options, user_context=""):
     model = load_model()
+    if model is None:
+        return "Error", 0.0, "Model failed to load", ""
+    # Retrieve web search + optional user context
     context_texts = web_search(query) + ([user_context] if user_context else [])
     context_str = "\n".join(context_texts)
     input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options]
+    # Encode and score each option
     scores = []
     for pair in input_pairs:
         encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
     best_idx = int(scores[1] > scores[0])
     return (
+        options[best_idx],                      # ✅ Evo's answer
+        max(scores),                            # ✅ Confidence
+        f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}",  # ✅ Reasoning trace
+        context_str                             # ✅ Context used
     )
+# 🔄 GPT backup response
 def get_gpt_response(query, user_context=""):
     try:
         context_block = f"\n\nContext:\n{user_context}" if user_context else ""
     except Exception as e:
         return f"⚠️ GPT error:\n\n{str(e)}"
+# 🎯 Used by app.py to display Evo live output
 def evo_chat_predict(history, query, options):
     context = "\n".join(history[-6:]) if history else ""
     evo_ans, evo_score, evo_reason, evo_ctx = get_evo_response(query, options, context)
     return {
         "answer": evo_ans,
         "confidence": round(evo_score, 3),
         "context_used": evo_ctx
     }
+# 📊 Returns current Evo architecture stats (for UI display)
+def get_model_config():
+    return {
+        "num_layers": 6,
+        "num_heads": 8,
+        "ffn_dim": 1024,
+        "memory_enabled": True
+    }