Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import requests | |
import pandas as pd | |
from smolagents import ( | |
CodeAgent, | |
DuckDuckGoSearchTool, | |
OpenAIServerModel, | |
) | |
import traceback # Import traceback for detailed error logging | |
import subprocess | |
class PythonREPLTool: | |
name = "python_repl" | |
description = "Runs Python code and returns the output or error." | |
def __init__(self, timeout=10): | |
self.timeout = timeout | |
def run(self, code: str) -> str: | |
try: | |
result = subprocess.run( | |
["python3", "-c", code], | |
timeout=self.timeout, | |
) | |
if result.returncode == 0: | |
return result.stdout.strip() | |
else: | |
return f"Error:\n{result.stderr.strip()}" | |
except subprocess.TimeoutExpired: | |
return "Execution timed out." | |
# --- Constants --- | |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
# --- Agent Definition --- | |
class GaiaAgent: | |
def __init__(self, openai_key: str): | |
self.openai_key = openai_key | |
# 1) Initialize the LLM-backed model | |
self.model = OpenAIServerModel( | |
model_id="gpt-4", # or "gpt-3.5-turbo" if you prefer | |
api_key=self.openai_key, | |
system_prompt=( | |
"You are a meticulous AI agent. " | |
"Always think in Python code using the available tools. " | |
"Never answer without executing or checking with a tool. " | |
"Use DuckDuckGoSearchTool for factual lookups. " | |
"Use PythonREPLTool for calculations, string manipulation, and logical deductions. " | |
"Respond with the final answer only. Do not include any extra explanation. " | |
"Here are some examples of how to use the tools:" | |
"# Example 1: Calculate the square root of 16\n" | |
"# ```python\n" | |
"# print(16**0.5)\n" | |
"# ```\n" | |
"# Example 2: Search for the capital of France\n" | |
"# ```python\n" | |
"# print(DuckDuckGoSearchTool(query='capital of France'))\n" | |
"# ```\n" | |
"# Example 3: Reverse a string\n" | |
"# ```python\n" | |
"# print('hello'[::-1])\n" | |
"# ```\n" | |
) | |
) | |
# 2) Define the tools | |
self.search_tool = DuckDuckGoSearchTool() | |
self.python_tool = PythonREPLTool(timeout=10) # Initialize PythonREPLTool | |
# 3) Create the CodeAgent | |
self.agent = CodeAgent( | |
model=self.model, | |
tools=[self.search_tool, self.python_tool], | |
# Encourage the agent to think step-by-step in code | |
max_steps=20 | |
), | |
def __call__(self, question: str) -> str: | |
try: | |
return self.agent.run(question) | |
except Exception as e: | |
error_message = f"Agent execution failed: {e}\n{traceback.format_exc()}" | |
print(error_message) # Log the error for debugging | |
return "ERROR: Agent failed to answer." # Return a string, not an exception | |
def run_and_submit_all(openai_key: str): | |
# --- Login & Setup --- | |
# if not profile: | |
# return "Please log in to Hugging Face to submit your score.", None | |
# username = profile.username.strip() | |
username = "anonymous" | |
# 1) Instantiate our improved agent | |
try: | |
agent = GaiaAgent(openai_key) | |
except Exception as e: | |
error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}" | |
print(error_message) | |
return f"Error initializing agent: {e}", None | |
# 2) Fetch the GAIA questions | |
questions_url = f"{DEFAULT_API_URL}/questions" | |
try: | |
resp = requests.get(questions_url, timeout=15) | |
resp.raise_for_status() | |
questions = resp.json() | |
except Exception as e: | |
error_message = f"Error fetching questions: {e}\n{traceback.format_exc()}" | |
print(error_message) | |
return f"Error fetching questions: {e}", None | |
# 3) Run the agent on each question | |
answers = [] | |
log = [] | |
for item in questions: | |
tid = item["task_id"] | |
q = item["question"] | |
try: | |
ans = agent(q) | |
except Exception as e: | |
error_message = f"Error processing question {tid}: {e}\n{traceback.format_exc()}" | |
print(error_message) # Print full traceback | |
ans = f"ERROR: {e}" | |
answers.append({"task_id": tid, "submitted_answer": ans}) | |
log.append({"Task ID": tid, "Question": q, "Answer": ans}) | |
# 4) Submit | |
submit_url = f"{DEFAULT_API_URL}/submit" | |
payload = { | |
"username": username, | |
"agent_code": f"https://huggingface.co/spaces/kshitijthakkar/GaiaAgent/tree/main", | |
"answers": answers, | |
} | |
try: | |
res = requests.post(submit_url, json=payload, timeout=60) | |
res.raise_for_status() | |
data = res.json() | |
status = ( | |
f"✅ Submission Successful!\n" | |
f"User: {data['username']}\n" | |
f"Score: {data['score']}% ({data['correct_count']}/{data['total_attempted']})\n" | |
f"Message: {data.get('message', '')}" | |
) | |
except Exception as e: | |
error_message = f"Submission failed: {e}\n{traceback.format_exc()}" | |
print(error_message) | |
status = f"Submission failed: {e}" | |
return status, pd.DataFrame(log) | |
# --- Gradio UI --- | |
def run_test_questions(profile, openai_key, test_questions): | |
if not profile: | |
return "Please log in to Hugging Face to run the test questions.", None | |
try: | |
agent = GaiaAgent(openai_key) | |
except Exception as e: | |
error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}" | |
print(error_message) | |
return f"Error initializing agent: {e}", None | |
log = [] | |
for q in test_questions: | |
try: | |
ans = agent(q) | |
except Exception as e: | |
error_message = f"Error processing test question: {e}\n{traceback.format_exc()}" | |
print(error_message) | |
ans = f"ERROR: {e}" | |
log.append({"Question": q, "Answer": ans}) | |
return pd.DataFrame(log) | |
with gr.Blocks() as demo: # Corrected to use gr.Blocks() | |
gr.Markdown("# GAIA Benchmark Runner") | |
gr.Markdown( | |
"1. Clone this Space and customize your agent logic.\n" | |
"2. Log in below (to get your HF username).\n" | |
"3. Enter your OpenAI key (if needed).\n" | |
"4. Click to run and submit to the leaderboard." | |
) | |
login = gr.LoginButton() | |
key_in = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...") | |
run_btn = gr.Button("Run & Submit") | |
out_status = gr.Textbox(label="Status", lines=4) | |
out_table = gr.DataFrame(label="Questions & Answers") | |
test_questions_input = gr.Textbox( | |
label="Test Questions (comma-separated)", | |
placeholder="What is the capital of France?, What is the square root of 25?", | |
) | |
run_test_btn = gr.Button("Run Test Questions") | |
test_results_output = gr.DataFrame(label="Test Results") | |
run_btn.click(fn=run_and_submit_all, inputs=[key_in], outputs=[out_status, out_table]) | |
run_test_btn.click( | |
fn=run_test_questions, | |
inputs=[login, key_in, test_questions_input], | |
outputs=[test_results_output], | |
) | |
if __name__ == "__main__": | |
demo.launch(debug=True, share=False) | |