Spaces:
Sleeping
Sleeping
Update inference.py
Browse files- inference.py +32 -32
inference.py
CHANGED
@@ -1,54 +1,54 @@
|
|
1 |
-
import os
|
2 |
import torch
|
3 |
-
|
|
|
4 |
from transformers import AutoTokenizer
|
|
|
5 |
from rag_utils import extract_text_from_file
|
6 |
from search_utils import web_search
|
7 |
|
8 |
-
# Load Evo model
|
9 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
10 |
-
model =
|
11 |
model.load_state_dict(torch.load("evo_hellaswag.pt", map_location="cpu"))
|
12 |
model.eval()
|
13 |
|
14 |
-
def
|
15 |
-
|
16 |
-
if
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
|
25 |
-
|
|
|
|
|
|
|
26 |
with torch.no_grad():
|
27 |
-
logits = model(
|
28 |
-
|
29 |
-
|
|
|
30 |
|
31 |
-
suggestion =
|
32 |
-
|
33 |
-
|
34 |
-
f"Context used:\n{full_context}"
|
35 |
-
)
|
36 |
-
return suggestion, reasoning
|
37 |
|
38 |
-
def get_gpt_response(
|
39 |
-
import openai
|
40 |
openai.api_key = os.getenv("OPENAI_API_KEY", "")
|
41 |
-
|
|
|
42 |
|
43 |
try:
|
44 |
response = openai.ChatCompletion.create(
|
45 |
model="gpt-3.5-turbo",
|
46 |
messages=[
|
47 |
-
{"role": "system", "content": "You are a helpful
|
48 |
-
{"role": "user", "content":
|
49 |
-
]
|
50 |
-
max_tokens=250
|
51 |
)
|
52 |
-
return response[
|
53 |
except Exception as e:
|
54 |
return f"⚠️ GPT error: {str(e)}"
|
|
|
|
|
1 |
import torch
|
2 |
+
import openai
|
3 |
+
import os
|
4 |
from transformers import AutoTokenizer
|
5 |
+
from evo_model import EvoTransformerV22
|
6 |
from rag_utils import extract_text_from_file
|
7 |
from search_utils import web_search
|
8 |
|
|
|
9 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
10 |
+
model = EvoTransformerV22()
|
11 |
model.load_state_dict(torch.load("evo_hellaswag.pt", map_location="cpu"))
|
12 |
model.eval()
|
13 |
|
14 |
+
def format_input(question, options, context, web_results):
|
15 |
+
prompt = f"{question}\n"
|
16 |
+
if context:
|
17 |
+
prompt += f"\nContext:\n{context}\n"
|
18 |
+
if web_results:
|
19 |
+
prompt += f"\nWeb Search Results:\n" + "\n".join(web_results)
|
20 |
+
prompt += "\nOptions:\n"
|
21 |
+
for idx, opt in enumerate(options):
|
22 |
+
prompt += f"{idx+1}. {opt}\n"
|
23 |
+
return prompt.strip()
|
24 |
|
25 |
+
def get_evo_response(question, context, options, enable_search=True):
|
26 |
+
web_results = web_search(question) if enable_search else []
|
27 |
+
input_text = format_input(question, options, context, web_results)
|
28 |
+
encoded = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=256)
|
29 |
with torch.no_grad():
|
30 |
+
logits = model(encoded["input_ids"])
|
31 |
+
probs = torch.softmax(logits, dim=1).squeeze()
|
32 |
+
pred_index = torch.argmax(probs).item()
|
33 |
+
confidence = probs[pred_index].item()
|
34 |
|
35 |
+
suggestion = options[pred_index] if pred_index < len(options) else "N/A"
|
36 |
+
evo_reasoning = f"Evo suggests: **{suggestion}** (Confidence: {confidence:.2f})\n\nContext used:\n" + "\n".join(web_results)
|
37 |
+
return suggestion, evo_reasoning
|
|
|
|
|
|
|
38 |
|
39 |
+
def get_gpt_response(question, context, options):
|
|
|
40 |
openai.api_key = os.getenv("OPENAI_API_KEY", "")
|
41 |
+
formatted_options = "\n".join([f"{i+1}. {opt}" for i, opt in enumerate(options)])
|
42 |
+
prompt = f"Question: {question}\n\nContext:\n{context}\n\nOptions:\n{formatted_options}\n\nWhich option makes the most sense and why?"
|
43 |
|
44 |
try:
|
45 |
response = openai.ChatCompletion.create(
|
46 |
model="gpt-3.5-turbo",
|
47 |
messages=[
|
48 |
+
{"role": "system", "content": "You are a helpful reasoning assistant."},
|
49 |
+
{"role": "user", "content": prompt}
|
50 |
+
]
|
|
|
51 |
)
|
52 |
+
return response['choices'][0]['message']['content']
|
53 |
except Exception as e:
|
54 |
return f"⚠️ GPT error: {str(e)}"
|