File size: 3,035 Bytes
f7f9a8a
785c4f7
f7f9a8a
11f2d5b
f7f9a8a
5b3d26d
7a7ebad
f7f9a8a
7a7ebad
f7f9a8a
 
70b5bb7
f7f9a8a
 
 
 
 
70b5bb7
f7f9a8a
 
 
 
 
 
 
 
 
 
 
7a7ebad
f7f9a8a
5b3d26d
f7f9a8a
 
 
5b3d26d
 
 
7a7ebad
f7f9a8a
5b3d26d
 
f7f9a8a
5b3d26d
f7f9a8a
 
5b3d26d
 
 
 
f7f9a8a
 
 
 
7a7ebad
5b3d26d
f7f9a8a
5b3d26d
 
 
e87c15e
5b3d26d
f7f9a8a
e87c15e
5b3d26d
e87c15e
5b3d26d
e87c15e
591a205
f7f9a8a
591a205
 
8ed51aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer
from evo_model import EvoTransformerV22
from search_utils import web_search
import openai
import time

# ๐Ÿ” Load OpenAI API Key securely
openai.api_key = os.getenv("OPENAI_API_KEY")

# ๐Ÿ” Track model changes
MODEL_PATH = "evo_hellaswag.pt"
last_mod_time = 0
model = None
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# ๐Ÿ“ฆ Load model with auto-reload if file is updated
def load_model():
    global model, last_mod_time
    current_mod_time = os.path.getmtime(MODEL_PATH)
    if model is None or current_mod_time > last_mod_time:
        model = EvoTransformerV22()
        model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu"))
        model.eval()
        last_mod_time = current_mod_time
        print("๐Ÿ” Evo model reloaded.")
    return model

# ๐Ÿง  Evo decision logic with confidence scores
def get_evo_response(query, options, user_context=""):
    model = load_model()

    # Retrieve RAG context + optional user input
    context_texts = web_search(query) + ([user_context] if user_context else [])
    context_str = "\n".join(context_texts)
    input_pairs = [f"{query} [SEP] {opt} [CTX] {context_str}" for opt in options]

    # Encode both options and compute scores
    scores = []
    for pair in input_pairs:
        encoded = tokenizer(pair, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
        with torch.no_grad():
            logits = model(encoded["input_ids"])
            score = torch.sigmoid(logits).item()
            scores.append(score)

    best_idx = int(scores[1] > scores[0])
    return (
        options[best_idx],  # ๐Ÿ”น Selected answer
        max(scores),        # ๐Ÿ”น Confidence score
        f"{options[0]}: {scores[0]:.3f} vs {options[1]}: {scores[1]:.3f}",  # ๐Ÿ”น Reasoning trace
        context_str         # ๐Ÿ”น Context used
    )

# ๐Ÿค– GPT-3.5 backup or comparison
def get_gpt_response(query, user_context=""):
    try:
        context_block = f"\n\nContext:\n{user_context}" if user_context else ""
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": query + context_block}],
            temperature=0.7,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"โš ๏ธ GPT error:\n\n{str(e)}"

# โœ… Final callable interface
def infer(query, options, user_context=""):
    return get_evo_response(query, options, user_context)

# ๐Ÿง  Unified chat-style interface for EvoRAG
def evo_chat_predict(history, query, options):
    # Use the last few exchanges as context (up to 3 pairs)
    context = "\n".join(history[-6:]) if history else ""

    evo_ans, evo_score, evo_reason, evo_ctx = get_evo_response(query, options, context)

    return {
        "answer": evo_ans,
        "confidence": round(evo_score, 3),
        "reasoning": evo_reason,
        "context_used": evo_ctx
    }