GaiaAgent / app.py
kshitijthakkar's picture
Update app.py
c6929d8 verified
import os
import gradio as gr
import requests
import pandas as pd
from smolagents import (
CodeAgent,
DuckDuckGoSearchTool,
OpenAIServerModel,
)
import traceback # Import traceback for detailed error logging
import subprocess
class PythonREPLTool:
name = "python_repl"
description = "Runs Python code and returns the output or error."
def __init__(self, timeout=10):
self.timeout = timeout
def run(self, code: str) -> str:
try:
result = subprocess.run(
["python3", "-c", code],
timeout=self.timeout,
)
if result.returncode == 0:
return result.stdout.strip()
else:
return f"Error:\n{result.stderr.strip()}"
except subprocess.TimeoutExpired:
return "Execution timed out."
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Agent Definition ---
class GaiaAgent:
def __init__(self, openai_key: str):
self.openai_key = openai_key
# 1) Initialize the LLM-backed model
self.model = OpenAIServerModel(
model_id="gpt-4", # or "gpt-3.5-turbo" if you prefer
api_key=self.openai_key,
system_prompt=(
"You are a meticulous AI agent. "
"Always think in Python code using the available tools. "
"Never answer without executing or checking with a tool. "
"Use DuckDuckGoSearchTool for factual lookups. "
"Use PythonREPLTool for calculations, string manipulation, and logical deductions. "
"Respond with the final answer only. Do not include any extra explanation. "
"Here are some examples of how to use the tools:"
"# Example 1: Calculate the square root of 16\n"
"# ```python\n"
"# print(16**0.5)\n"
"# ```\n"
"# Example 2: Search for the capital of France\n"
"# ```python\n"
"# print(DuckDuckGoSearchTool(query='capital of France'))\n"
"# ```\n"
"# Example 3: Reverse a string\n"
"# ```python\n"
"# print('hello'[::-1])\n"
"# ```\n"
)
)
# 2) Define the tools
self.search_tool = DuckDuckGoSearchTool()
self.python_tool = PythonREPLTool(timeout=10) # Initialize PythonREPLTool
# 3) Create the CodeAgent
self.agent = CodeAgent(
model=self.model,
tools=[self.search_tool, self.python_tool],
# Encourage the agent to think step-by-step in code
max_steps=20
),
def __call__(self, question: str) -> str:
try:
return self.agent.run(question)
except Exception as e:
error_message = f"Agent execution failed: {e}\n{traceback.format_exc()}"
print(error_message) # Log the error for debugging
return "ERROR: Agent failed to answer." # Return a string, not an exception
def run_and_submit_all(openai_key: str):
# --- Login & Setup ---
# if not profile:
# return "Please log in to Hugging Face to submit your score.", None
# username = profile.username.strip()
username = "anonymous"
# 1) Instantiate our improved agent
try:
agent = GaiaAgent(openai_key)
except Exception as e:
error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}"
print(error_message)
return f"Error initializing agent: {e}", None
# 2) Fetch the GAIA questions
questions_url = f"{DEFAULT_API_URL}/questions"
try:
resp = requests.get(questions_url, timeout=15)
resp.raise_for_status()
questions = resp.json()
except Exception as e:
error_message = f"Error fetching questions: {e}\n{traceback.format_exc()}"
print(error_message)
return f"Error fetching questions: {e}", None
# 3) Run the agent on each question
answers = []
log = []
for item in questions:
tid = item["task_id"]
q = item["question"]
try:
ans = agent(q)
except Exception as e:
error_message = f"Error processing question {tid}: {e}\n{traceback.format_exc()}"
print(error_message) # Print full traceback
ans = f"ERROR: {e}"
answers.append({"task_id": tid, "submitted_answer": ans})
log.append({"Task ID": tid, "Question": q, "Answer": ans})
# 4) Submit
submit_url = f"{DEFAULT_API_URL}/submit"
payload = {
"username": username,
"agent_code": f"https://huggingface.co/spaces/kshitijthakkar/GaiaAgent/tree/main",
"answers": answers,
}
try:
res = requests.post(submit_url, json=payload, timeout=60)
res.raise_for_status()
data = res.json()
status = (
f"✅ Submission Successful!\n"
f"User: {data['username']}\n"
f"Score: {data['score']}% ({data['correct_count']}/{data['total_attempted']})\n"
f"Message: {data.get('message', '')}"
)
except Exception as e:
error_message = f"Submission failed: {e}\n{traceback.format_exc()}"
print(error_message)
status = f"Submission failed: {e}"
return status, pd.DataFrame(log)
# --- Gradio UI ---
def run_test_questions(profile, openai_key, test_questions):
if not profile:
return "Please log in to Hugging Face to run the test questions.", None
try:
agent = GaiaAgent(openai_key)
except Exception as e:
error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}"
print(error_message)
return f"Error initializing agent: {e}", None
log = []
for q in test_questions:
try:
ans = agent(q)
except Exception as e:
error_message = f"Error processing test question: {e}\n{traceback.format_exc()}"
print(error_message)
ans = f"ERROR: {e}"
log.append({"Question": q, "Answer": ans})
return pd.DataFrame(log)
with gr.Blocks() as demo: # Corrected to use gr.Blocks()
gr.Markdown("# GAIA Benchmark Runner")
gr.Markdown(
"1. Clone this Space and customize your agent logic.\n"
"2. Log in below (to get your HF username).\n"
"3. Enter your OpenAI key (if needed).\n"
"4. Click to run and submit to the leaderboard."
)
login = gr.LoginButton()
key_in = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
run_btn = gr.Button("Run & Submit")
out_status = gr.Textbox(label="Status", lines=4)
out_table = gr.DataFrame(label="Questions & Answers")
test_questions_input = gr.Textbox(
label="Test Questions (comma-separated)",
placeholder="What is the capital of France?, What is the square root of 25?",
)
run_test_btn = gr.Button("Run Test Questions")
test_results_output = gr.DataFrame(label="Test Results")
run_btn.click(fn=run_and_submit_all, inputs=[key_in], outputs=[out_status, out_table])
run_test_btn.click(
fn=run_test_questions,
inputs=[login, key_in, test_questions_input],
outputs=[test_results_output],
)
if __name__ == "__main__":
demo.launch(debug=True, share=False)