LamiaYT's picture
fixing ver3
57b9551
raw
history blame
8.9 kB
import os
import gradio as gr
import requests
import json
import re
import numexpr
import pandas as pd
from pdfminer.high_level import extract_text
from bs4 import BeautifulSoup
from typing import List, Dict, Optional, Tuple
from dotenv import load_dotenv
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import torch
import time
import gc
# --- Load Environment Variables ---
load_dotenv()
SERPER_API_KEY = os.getenv("SERPER_API_KEY")
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MAX_STEPS = 6
MAX_TOKENS = 256
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
TIMEOUT_PER_QUESTION = 45
MAX_RESULT_LENGTH = 500
# --- Fixed Model Loading ---
print("Loading model with fixed configuration...")
start_time = time.time()
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
torch_dtype=torch.float32,
device_map="auto",
low_cpu_mem_usage=True
)
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME,
use_fast=True,
trust_remote_code=True
)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
print(f"Model loaded in {time.time() - start_time:.2f} seconds")
# --- Tools Implementation ---
def web_search(query: str) -> str:
"""Enhanced web search with better error handling"""
try:
if SERPER_API_KEY:
params = {'q': query, 'num': 3}
headers = {'X-API-KEY': SERPER_API_KEY}
response = requests.post(
'https://google.serper.dev/search',
headers=headers,
json=params,
timeout=10
)
results = response.json()
if 'organic' in results:
return "\n".join([f"{r['title']}: {r['snippet']}" for r in results['organic'][:3]])[:MAX_RESULT_LENGTH]
return "No search results found"
else:
return "Search API key not configured"
except Exception as e:
return f"Search error: {str(e)}"
def calculator(expression: str) -> str:
"""Safe mathematical evaluation"""
try:
expression = re.sub(r'[^\d+\-*/().^%,\s]', '', expression)
if not expression:
return "Invalid empty expression"
return str(numexpr.evaluate(expression))
except Exception as e:
return f"Calculation error: {str(e)}"
def read_webpage(url: str) -> str:
"""Robust webpage content extraction"""
try:
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, timeout=10, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
for element in soup(['script', 'style', 'nav', 'footer']):
element.decompose()
text = soup.get_text(separator='\n', strip=True)
return re.sub(r'\n{3,}', '\n\n', text)[:MAX_RESULT_LENGTH]
except Exception as e:
return f"Webpage error: {str(e)}"
TOOLS = {
"web_search": web_search,
"calculator": calculator,
"read_webpage": read_webpage
}
# --- Fixed GAIA Agent ---
class GAIA_Agent:
def __init__(self):
self.tools = TOOLS
self.system_prompt = """You are an advanced GAIA problem solver. Follow these steps:
1. Analyze the question
2. Choose the best tool
3. Process results
4. Provide final answer
Tools:
- web_search: For general knowledge
- calculator: For math
- read_webpage: For web content
Tool format: ```json
{"tool": "tool_name", "args": {"arg1": value}}```
Always end with: Final Answer: [answer]"""
def __call__(self, question: str) -> str:
start_time = time.time()
history = [f"Question: {question}"]
try:
for step in range(MAX_STEPS):
if time.time() - start_time > TIMEOUT_PER_QUESTION:
return "Timeout: Processing took too long"
prompt = self._build_prompt(history)
response = self._call_model(prompt)
if "Final Answer:" in response:
return response.split("Final Answer:")[-1].strip()[:500]
tool_call = self._parse_tool_call(response)
if tool_call:
tool_name, args = tool_call
observation = self._use_tool(tool_name, args)
history.append(f"Tool: {tool_name}")
history.append(f"Result: {observation[:300]}...")
else:
history.append(f"Thought: {response}")
gc.collect()
return "Maximum steps reached"
except Exception as e:
return f"Error: {str(e)}"
def _build_prompt(self, history: List[str]) -> str:
return f"<|system|>\n{self.system_prompt}<|end|>\n<|user|>\n" + "\n".join(history) + "<|end|>\n<|assistant|>"
def _call_model(self, prompt: str) -> str:
inputs = tokenizer(
prompt,
return_tensors="pt",
truncation=True,
max_length=3072,
padding=False
)
# Fixed generation config without problematic parameters
outputs = model.generate(
inputs.input_ids,
max_new_tokens=MAX_TOKENS,
temperature=0.3,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.pad_token_id,
attention_mask=inputs.attention_mask
)
return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
def _parse_tool_call(self, text: str) -> Optional[Tuple[str, Dict]]:
try:
json_match = re.search(r'```json\s*({.+?})\s*```', text, re.DOTALL)
if json_match:
tool_call = json.loads(json_match.group(1))
if "tool" in tool_call and "args" in tool_call:
return tool_call["tool"], tool_call["args"]
except:
return None
return None
def _use_tool(self, tool_name: str, args: Dict) -> str:
if tool_name not in self.tools:
return f"Unknown tool: {tool_name}"
try:
# Handle URL extraction for webpage reading
if tool_name == "read_webpage" and "url" not in args:
if "http" in str(args):
url = re.search(r'https?://[^\s]+', str(args)).group()
args = {"url": url}
return str(self.tools[tool_name](**args))[:MAX_RESULT_LENGTH]
except Exception as e:
return f"Tool error: {str(e)}"
# --- Evaluation Runner ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
if not profile:
return "Please login first", None
agent = GAIA_Agent()
questions_url = f"{DEFAULT_API_URL}/questions"
submit_url = f"{DEFAULT_API_URL}/submit"
try:
response = requests.get(questions_url, timeout=15)
questions_data = response.json()
except Exception as e:
return f"Failed to get questions: {str(e)}", None
results = []
answers = []
for i, item in enumerate(questions_data):
task_id = item.get("task_id")
question = item.get("question")
if not task_id or not question:
continue
print(f"Processing question {i+1}/{len(questions_data)}")
answer = agent(question)
answers.append({"task_id": task_id, "submitted_answer": answer})
results.append({
"Task ID": task_id,
"Question": question[:100] + "..." if len(question) > 100 else question,
"Answer": answer[:100] + "..." if len(answer) > 100 else answer
})
submission = {
"username": profile.username,
"agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}",
"answers": answers
}
try:
response = requests.post(submit_url, json=submission, timeout=30)
result = response.json()
return f"Submitted! Score: {result.get('score', 'N/A')}", pd.DataFrame(results)
except Exception as e:
return f"Submission failed: {str(e)}", pd.DataFrame(results)
# --- Gradio Interface ---
with gr.Blocks(title="Fixed GAIA Agent") as demo:
gr.Markdown("## 🛠️ Fixed GAIA Agent")
gr.Markdown("Resolved the 'DynamicCache' error with improved configuration")
with gr.Row():
gr.LoginButton()
run_btn = gr.Button("Run Evaluation", variant="primary")
output_status = gr.Textbox(label="Status")
results_table = gr.DataFrame(label="Results")
run_btn.click(
run_and_submit_all,
outputs=[output_status, results_table]
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)