Spaces:
Sleeping
Sleeping
File size: 7,454 Bytes
10e9b7d eccf8e4 3c4371f 555572d 747cde9 555572d 6210d5d e80aab9 3db6293 e80aab9 747cde9 555572d 4ee4399 555572d 747cde9 4ee4399 747cde9 545d067 c976bf6 747cde9 c6929d8 3c4371f 555572d 4ee4399 747cde9 4ee4399 3c4371f 4c8870a 555572d 4c8870a 555572d 31243f4 555572d 31243f4 4ee4399 31243f4 3c4371f 555572d eccf8e4 555572d 7d65c66 4ee4399 555572d e80aab9 555572d 31243f4 555572d 31243f4 4ee4399 747cde9 555572d e80aab9 555572d 4ee4399 e80aab9 7d65c66 4ee4399 555572d e80aab9 4ee4399 2dac529 555572d 747cde9 555572d 0ee0419 555572d e80aab9 555572d 747cde9 4c8870a 747cde9 e80aab9 555572d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
import os
import gradio as gr
import requests
import pandas as pd
from smolagents import (
CodeAgent,
DuckDuckGoSearchTool,
OpenAIServerModel,
)
import traceback # Import traceback for detailed error logging
import subprocess
class PythonREPLTool:
name = "python_repl"
description = "Runs Python code and returns the output or error."
def __init__(self, timeout=10):
self.timeout = timeout
def run(self, code: str) -> str:
try:
result = subprocess.run(
["python3", "-c", code],
timeout=self.timeout,
)
if result.returncode == 0:
return result.stdout.strip()
else:
return f"Error:\n{result.stderr.strip()}"
except subprocess.TimeoutExpired:
return "Execution timed out."
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Agent Definition ---
class GaiaAgent:
def __init__(self, openai_key: str):
self.openai_key = openai_key
# 1) Initialize the LLM-backed model
self.model = OpenAIServerModel(
model_id="gpt-4", # or "gpt-3.5-turbo" if you prefer
api_key=self.openai_key,
system_prompt=(
"You are a meticulous AI agent. "
"Always think in Python code using the available tools. "
"Never answer without executing or checking with a tool. "
"Use DuckDuckGoSearchTool for factual lookups. "
"Use PythonREPLTool for calculations, string manipulation, and logical deductions. "
"Respond with the final answer only. Do not include any extra explanation. "
"Here are some examples of how to use the tools:"
"# Example 1: Calculate the square root of 16\n"
"# ```python\n"
"# print(16**0.5)\n"
"# ```\n"
"# Example 2: Search for the capital of France\n"
"# ```python\n"
"# print(DuckDuckGoSearchTool(query='capital of France'))\n"
"# ```\n"
"# Example 3: Reverse a string\n"
"# ```python\n"
"# print('hello'[::-1])\n"
"# ```\n"
)
)
# 2) Define the tools
self.search_tool = DuckDuckGoSearchTool()
self.python_tool = PythonREPLTool(timeout=10) # Initialize PythonREPLTool
# 3) Create the CodeAgent
self.agent = CodeAgent(
model=self.model,
tools=[self.search_tool, self.python_tool],
# Encourage the agent to think step-by-step in code
max_steps=20
),
def __call__(self, question: str) -> str:
try:
return self.agent.run(question)
except Exception as e:
error_message = f"Agent execution failed: {e}\n{traceback.format_exc()}"
print(error_message) # Log the error for debugging
return "ERROR: Agent failed to answer." # Return a string, not an exception
def run_and_submit_all(openai_key: str):
# --- Login & Setup ---
# if not profile:
# return "Please log in to Hugging Face to submit your score.", None
# username = profile.username.strip()
username = "anonymous"
# 1) Instantiate our improved agent
try:
agent = GaiaAgent(openai_key)
except Exception as e:
error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}"
print(error_message)
return f"Error initializing agent: {e}", None
# 2) Fetch the GAIA questions
questions_url = f"{DEFAULT_API_URL}/questions"
try:
resp = requests.get(questions_url, timeout=15)
resp.raise_for_status()
questions = resp.json()
except Exception as e:
error_message = f"Error fetching questions: {e}\n{traceback.format_exc()}"
print(error_message)
return f"Error fetching questions: {e}", None
# 3) Run the agent on each question
answers = []
log = []
for item in questions:
tid = item["task_id"]
q = item["question"]
try:
ans = agent(q)
except Exception as e:
error_message = f"Error processing question {tid}: {e}\n{traceback.format_exc()}"
print(error_message) # Print full traceback
ans = f"ERROR: {e}"
answers.append({"task_id": tid, "submitted_answer": ans})
log.append({"Task ID": tid, "Question": q, "Answer": ans})
# 4) Submit
submit_url = f"{DEFAULT_API_URL}/submit"
payload = {
"username": username,
"agent_code": f"https://huggingface.co/spaces/kshitijthakkar/GaiaAgent/tree/main",
"answers": answers,
}
try:
res = requests.post(submit_url, json=payload, timeout=60)
res.raise_for_status()
data = res.json()
status = (
f"✅ Submission Successful!\n"
f"User: {data['username']}\n"
f"Score: {data['score']}% ({data['correct_count']}/{data['total_attempted']})\n"
f"Message: {data.get('message', '')}"
)
except Exception as e:
error_message = f"Submission failed: {e}\n{traceback.format_exc()}"
print(error_message)
status = f"Submission failed: {e}"
return status, pd.DataFrame(log)
# --- Gradio UI ---
def run_test_questions(profile, openai_key, test_questions):
if not profile:
return "Please log in to Hugging Face to run the test questions.", None
try:
agent = GaiaAgent(openai_key)
except Exception as e:
error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}"
print(error_message)
return f"Error initializing agent: {e}", None
log = []
for q in test_questions:
try:
ans = agent(q)
except Exception as e:
error_message = f"Error processing test question: {e}\n{traceback.format_exc()}"
print(error_message)
ans = f"ERROR: {e}"
log.append({"Question": q, "Answer": ans})
return pd.DataFrame(log)
with gr.Blocks() as demo: # Corrected to use gr.Blocks()
gr.Markdown("# GAIA Benchmark Runner")
gr.Markdown(
"1. Clone this Space and customize your agent logic.\n"
"2. Log in below (to get your HF username).\n"
"3. Enter your OpenAI key (if needed).\n"
"4. Click to run and submit to the leaderboard."
)
login = gr.LoginButton()
key_in = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
run_btn = gr.Button("Run & Submit")
out_status = gr.Textbox(label="Status", lines=4)
out_table = gr.DataFrame(label="Questions & Answers")
test_questions_input = gr.Textbox(
label="Test Questions (comma-separated)",
placeholder="What is the capital of France?, What is the square root of 25?",
)
run_test_btn = gr.Button("Run Test Questions")
test_results_output = gr.DataFrame(label="Test Results")
run_btn.click(fn=run_and_submit_all, inputs=[key_in], outputs=[out_status, out_table])
run_test_btn.click(
fn=run_test_questions,
inputs=[login, key_in, test_questions_input],
outputs=[test_results_output],
)
if __name__ == "__main__":
demo.launch(debug=True, share=False)
|