Spaces:
Sleeping
Sleeping
File size: 3,991 Bytes
f7f9a8a 785c4f7 f7f9a8a 11f2d5b f7f9a8a 5b3d26d 7a7ebad f7f9a8a 0067953 7a7ebad f7f9a8a 70b5bb7 541d702 f7f9a8a 541d702 70b5bb7 541d702 f7f9a8a 541d702 f7f9a8a 7a7ebad 541d702 5b3d26d f7f9a8a 541d702 f7f9a8a 541d702 5b3d26d 7a7ebad 541d702 5b3d26d f7f9a8a 5b3d26d f7f9a8a 5b3d26d 541d702 7a7ebad 5b3d26d 541d702 5b3d26d e87c15e 5b3d26d f7f9a8a e87c15e 5b3d26d e87c15e 5b3d26d e87c15e 591a205 541d702 8ed51aa 541d702 0067953 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import os
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer
from evo_model import EvoTransformerV22
from search_utils import web_search
import openai
import time
import psutil
import platform
# π Load OpenAI API Key securely
openai.api_key = os.getenv("OPENAI_API_KEY")
# π¦ Constants
MODEL_PATH = "evo_hellaswag.pt"
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = None
last_mod_time = 0
# π Reload model if changed on disk
def load_model():
global model, last_mod_time
try:
current_mod_time = os.path.getmtime(MODEL_PATH)
if model is None or current_mod_time > last_mod_time:
model = EvoTransformerV22()
model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu"))
model.eval()
last_mod_time = current_mod_time
print("π Evo model reloaded.")
except Exception as e:
print(f"β Error loading Evo model: {e}")
model = None
return model
# π§ Evo logic
def get_evo_response(query, options, user_context=""):
model = load_model()
if model is None:
return "Error", 0.0, "Model failed to load", ""
# Retrieve web search + optional user context
context_texts = web_search(query) + ([user_context] if user_context else [])
context_str = "\n".join(context_texts)
input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options]
# Encode and score each option
scores = []
for pair in input_pairs:
encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
with torch.no_grad():
logits = model(encoded["input_ids"])
score = torch.sigmoid(logits).item()
scores.append(score)
best_idx = int(scores[1] > scores[0])
return (
options[best_idx], # β
Evo's answer
max(scores), # β
Confidence
f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}", # β
Reasoning trace
context_str # β
Context used
)
# π GPT backup response
def get_gpt_response(query, user_context=""):
try:
context_block = f"\n\nContext:\n{user_context}" if user_context else ""
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": query + context_block}],
temperature=0.7,
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"β οΈ GPT error:\n\n{str(e)}"
# π― Used by app.py to display Evo live output
def evo_chat_predict(history, query, options):
context = "\n".join(history[-6:]) if history else ""
evo_ans, evo_score, evo_reason, evo_ctx = get_evo_response(query, options, context)
return {
"answer": evo_ans,
"confidence": round(evo_score, 3),
"reasoning": evo_reason,
"context_used": evo_ctx
}
# π Returns current Evo architecture stats (for UI display)
def get_model_config():
return {
"num_layers": 6,
"num_heads": 8,
"ffn_dim": 1024,
"memory_enabled": True
}
def get_system_stats():
gpu_info = torch.cuda.get_device_properties(0) if torch.cuda.is_available() else None
memory = psutil.virtual_memory()
return {
"device": "GPU" if torch.cuda.is_available() else "CPU",
"cpu_usage_percent": psutil.cpu_percent(),
"memory_used_gb": round(memory.used / (1024 ** 3), 2),
"memory_total_gb": round(memory.total / (1024 ** 3), 2),
"gpu_name": gpu_info.name if gpu_info else "N/A",
"gpu_memory_total_gb": round(gpu_info.total_memory / (1024 ** 3), 2) if gpu_info else "N/A",
"gpu_memory_used_gb": round(torch.cuda.memory_allocated() / (1024 ** 3), 2) if gpu_info else "N/A",
"platform": platform.platform()
}
|