import torch from evo_model import EvoTransformer from transformers import AutoTokenizer from rag_utils import extract_text_from_file from search_utils import web_search tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") def load_model(model_path="evo_hellaswag.pt", device=None): if device is None: device = "cuda" if torch.cuda.is_available() else "cpu" model = EvoTransformer() model.load_state_dict(torch.load(model_path, map_location=device)) model.to(device) model.eval() return model, device evo_model, device = load_model() def get_evo_response(query, file=None, enable_search=True): context = "" if file: try: context += extract_text_from_file(file)[:800] except: pass if enable_search: search_snippets = web_search(query) context += "\n".join(search_snippets) combined_prompt = f"{query}\nContext:\n{context}" inputs = [ f"{combined_prompt} Option 1:", f"{combined_prompt} Option 2:", ] encoded = tokenizer(inputs, padding=True, truncation=True, return_tensors="pt").to(device) with torch.no_grad(): outputs = evo_model(encoded["input_ids"]).squeeze(-1) probs = torch.softmax(outputs, dim=0) best = torch.argmax(probs).item() return f"Option {best + 1} with {probs[best]:.2%} confidence.\n\nReasoning:\n{inputs[best]}" def get_gpt_response(query, context=""): import openai openai.api_key = os.getenv("OPENAI_API_KEY") prompt = f"{query}\nContext:\n{context}\nGive a thoughtful recommendation with reasons." try: response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[{"role": "user", "content": prompt}], max_tokens=300, temperature=0.7, ) return response.choices[0].message.content.strip() except Exception as e: return f"Error: {str(e)}"