Spaces:
Sleeping
Sleeping
Update inference.py
Browse files- inference.py +12 -10
inference.py
CHANGED
@@ -18,7 +18,7 @@ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
|
18 |
model = None
|
19 |
last_mod_time = 0
|
20 |
|
21 |
-
# π Reload model if
|
22 |
def load_model():
|
23 |
global model, last_mod_time
|
24 |
try:
|
@@ -34,18 +34,18 @@ def load_model():
|
|
34 |
model = None
|
35 |
return model
|
36 |
|
37 |
-
# π§ Evo logic
|
38 |
def get_evo_response(query, options, user_context=""):
|
39 |
model = load_model()
|
40 |
if model is None:
|
41 |
return "Error", 0.0, "Model failed to load", ""
|
42 |
|
43 |
-
#
|
44 |
context_texts = web_search(query) + ([user_context] if user_context else [])
|
45 |
context_str = "\n".join(context_texts)
|
46 |
input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options]
|
47 |
|
48 |
-
#
|
49 |
scores = []
|
50 |
for pair in input_pairs:
|
51 |
encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
|
@@ -56,13 +56,13 @@ def get_evo_response(query, options, user_context=""):
|
|
56 |
|
57 |
best_idx = int(scores[1] > scores[0])
|
58 |
return (
|
59 |
-
options[best_idx], # β
Evo's answer
|
60 |
max(scores), # β
Confidence
|
61 |
f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}", # β
Reasoning trace
|
62 |
-
context_str # β
|
63 |
)
|
64 |
|
65 |
-
# π GPT
|
66 |
def get_gpt_response(query, user_context=""):
|
67 |
try:
|
68 |
context_block = f"\n\nContext:\n{user_context}" if user_context else ""
|
@@ -75,7 +75,7 @@ def get_gpt_response(query, user_context=""):
|
|
75 |
except Exception as e:
|
76 |
return f"β οΈ GPT error:\n\n{str(e)}"
|
77 |
|
78 |
-
# π―
|
79 |
def evo_chat_predict(history, query, options):
|
80 |
context = "\n".join(history[-6:]) if history else ""
|
81 |
evo_ans, evo_score, evo_reason, evo_ctx = get_evo_response(query, options, context)
|
@@ -86,15 +86,17 @@ def evo_chat_predict(history, query, options):
|
|
86 |
"context_used": evo_ctx
|
87 |
}
|
88 |
|
89 |
-
# π
|
90 |
def get_model_config():
|
91 |
return {
|
92 |
"num_layers": 6,
|
93 |
"num_heads": 8,
|
94 |
"ffn_dim": 1024,
|
95 |
-
"memory_enabled": True
|
|
|
96 |
}
|
97 |
|
|
|
98 |
def get_system_stats():
|
99 |
gpu_info = torch.cuda.get_device_properties(0) if torch.cuda.is_available() else None
|
100 |
memory = psutil.virtual_memory()
|
|
|
18 |
model = None
|
19 |
last_mod_time = 0
|
20 |
|
21 |
+
# π Reload model if updated on disk
|
22 |
def load_model():
|
23 |
global model, last_mod_time
|
24 |
try:
|
|
|
34 |
model = None
|
35 |
return model
|
36 |
|
37 |
+
# π§ Evo inference logic
|
38 |
def get_evo_response(query, options, user_context=""):
|
39 |
model = load_model()
|
40 |
if model is None:
|
41 |
return "Error", 0.0, "Model failed to load", ""
|
42 |
|
43 |
+
# Context = web + user
|
44 |
context_texts = web_search(query) + ([user_context] if user_context else [])
|
45 |
context_str = "\n".join(context_texts)
|
46 |
input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options]
|
47 |
|
48 |
+
# Score each option
|
49 |
scores = []
|
50 |
for pair in input_pairs:
|
51 |
encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
|
|
|
56 |
|
57 |
best_idx = int(scores[1] > scores[0])
|
58 |
return (
|
59 |
+
options[best_idx], # β
Evo's best answer
|
60 |
max(scores), # β
Confidence
|
61 |
f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}", # β
Reasoning trace
|
62 |
+
context_str # β
Used context
|
63 |
)
|
64 |
|
65 |
+
# π GPT-3.5 response
|
66 |
def get_gpt_response(query, user_context=""):
|
67 |
try:
|
68 |
context_block = f"\n\nContext:\n{user_context}" if user_context else ""
|
|
|
75 |
except Exception as e:
|
76 |
return f"β οΈ GPT error:\n\n{str(e)}"
|
77 |
|
78 |
+
# π― For EvoRAG app UI
|
79 |
def evo_chat_predict(history, query, options):
|
80 |
context = "\n".join(history[-6:]) if history else ""
|
81 |
evo_ans, evo_score, evo_reason, evo_ctx = get_evo_response(query, options, context)
|
|
|
86 |
"context_used": evo_ctx
|
87 |
}
|
88 |
|
89 |
+
# π Evo architecture stats
|
90 |
def get_model_config():
|
91 |
return {
|
92 |
"num_layers": 6,
|
93 |
"num_heads": 8,
|
94 |
"ffn_dim": 1024,
|
95 |
+
"memory_enabled": True,
|
96 |
+
"param_count": sum(p.numel() for p in model.parameters() if p.requires_grad) if model else "N/A"
|
97 |
}
|
98 |
|
99 |
+
# π» Hardware and system stats
|
100 |
def get_system_stats():
|
101 |
gpu_info = torch.cuda.get_device_properties(0) if torch.cuda.is_available() else None
|
102 |
memory = psutil.virtual_memory()
|