Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
import requests | |
import inspect | |
import pandas as pd | |
from smolagents import OpenAIServerModel | |
from smolagents import CodeAgent, Tool, tool | |
from smolagents import DuckDuckGoSearchTool, VisitWebpageTool | |
from smolagents import PythonInterpreterTool # Import the built-in Python Interpreter Tool | |
# --- Constants --- | |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
# --- Tool Definitions --- | |
class GaiaFileTool(Tool): | |
""" | |
A smolagents.Tool subclass for downloading files from the GAIA API. | |
""" | |
name = "download_gaia_file" | |
description = "Downloads a file associated with a given GAIA task ID and returns its content. It takes 'task_id' as input and returns the file content as a string. Use this when a question refers to an external file." | |
inputs = {"task_id": {"type": "string", "description": "The task ID for which to download the file (e.g., '2345')."}} # Corrected type to "string" | |
output_type = "string" # Corrected type to "string" | |
def __init__(self, api_base_url=DEFAULT_API_URL): | |
super().__init__() | |
self.api_base_url = api_base_url | |
print(f"GaiaFileTool initialized with API base URL: {self.api_base_url}") | |
def forward(self, task_id: str) -> str: | |
""" | |
The core logic for the tool: downloads a file from the GAIA API. | |
This method is called by the agent when it uses this tool. | |
""" | |
file_url = f"{self.api_base_url}/files/{task_id}" | |
print(f"Attempting to download file from: {file_url}") | |
try: | |
response = requests.get(file_url) | |
response.raise_for_status() | |
print(f"Successfully downloaded file for task_id {task_id}") | |
return response.text | |
except requests.exceptions.RequestException as e: | |
print(f"Error downloading file for task_id {task_id}: {e}") | |
return f"Error downloading file: {e}" | |
# Removed the custom python_repl function as we are using the built-in tool | |
# --- Custom GAIA Agent Definition --- | |
class GaiaAgent(CodeAgent): | |
""" | |
A smolagents-based agent designed to tackle GAIA Level 1 benchmark questions. | |
It uses Gemini Flash for reasoning and integrates a Python Interpreter, a | |
GAIA file download tool, and web browsing/searching tools. | |
""" | |
def __init__(self): | |
print("GaiaAgent initializing...") | |
gemini_api_key = os.getenv("GEMINI_API_KEY") | |
if not gemini_api_key: | |
print("WARNING: GEMINI_API_KEY environment variable not set.") | |
print("Please set GEMINI_API_KEY for Gemini Flash to work.") | |
self.llm_model = OpenAIServerModel( | |
model_id="gemini-2.0-flash", | |
api_base="https://generativelanguage.googleapis.com/v1beta/openai/", | |
api_key=gemini_api_key, | |
temperature=0.1, | |
) | |
# Initialize GAIA file tool | |
gaia_file_tool_instance = GaiaFileTool() | |
# Initialize web searching and browsing tools | |
duckduckgo_search_tool = DuckDuckGoSearchTool() | |
visit_webpage_tool = VisitWebpageTool() | |
# Initialize the built-in Python Interpreter Tool | |
# By default, PythonInterpreterTool uses a local executor, which is generally safe | |
# for controlled environments like Hugging Face Spaces. | |
python_interpreter_tool = PythonInterpreterTool() | |
# Define the tools available to the agent | |
agent_tools = [ | |
python_interpreter_tool, # Use the built-in Python interpreter tool | |
gaia_file_tool_instance, | |
duckduckgo_search_tool, | |
visit_webpage_tool | |
] | |
super().__init__(model=self.llm_model, tools=agent_tools) | |
print("GaiaAgent initialized successfully with Gemini Flash and built-in tools.") | |
def __call__(self, question: str) -> str: | |
""" | |
The main method for the agent to process a question and return an answer. | |
This will involve the agent's internal reasoning, tool use, and planning. | |
""" | |
print(f"\n--- Agent received question (first 100 chars): {question[:100]}...") | |
try: | |
prompt = ( | |
f"You are an AI agent designed to solve GAIA benchmark questions. " | |
f"Your goal is to provide the exact answer as a string, without any additional text, " | |
f"explanation, or the phrase 'FINAL ANSWER:'. " | |
f"Break down the problem, use the available tools (python_interpreter, download_gaia_file, " | |
f"duckduckgo_search_tool, visit_webpage_tool) as needed, and think step-by-step. " | |
f"Use 'python_interpreter' for any calculations or code execution. " | |
f"Use 'duckduckgo_search_tool' to find information on the web. " | |
f"Use 'visit_webpage_tool' to read the content of a specific URL. " | |
f"When you have the final answer, output ONLY the answer string.\n\n" | |
f"Question: {question}" | |
) | |
result = self.run(prompt) | |
final_answer = self._extract_exact_answer(result) | |
print(f"--- Agent returning final answer (first 100 chars): {final_answer[:100]}...") | |
return final_answer | |
except Exception as e: | |
print(f"--- Error during agent execution: {e}") | |
return "Agent encountered an error and could not provide an answer." | |
def _extract_exact_answer(self, raw_output: str) -> str: | |
""" | |
Extracts and formats the exact answer from the agent's raw output. | |
Ensures no "FINAL ANSWER" text is included and handles any | |
extraneous formatting. This function is crucial for GAIA's exact match scoring. | |
""" | |
cleaned_output = raw_output.replace("FINAL ANSWER:", "").strip() | |
cleaned_output = cleaned_output.replace("Answer:", "").strip() | |
cleaned_output = cleaned_output.replace("The answer is:", "").strip() | |
cleaned_output = cleaned_output.replace("```python", "").replace("```", "").strip() | |
lines = cleaned_output.split('\n') | |
if lines: | |
potential_answer = lines[-1].strip() | |
if len(potential_answer) < 5 or "tool_code" in potential_answer.lower(): | |
for line in reversed(lines[:-1]): | |
if line.strip() and "tool_code" not in line.lower(): | |
potential_answer = line.strip() | |
break | |
cleaned_output = potential_answer | |
if cleaned_output.startswith('"') and cleaned_output.endswith('"'): | |
cleaned_output = cleaned_output[1:-1] | |
if cleaned_output.startswith("'") and cleaned_output.endswith("'"): | |
cleaned_output = cleaned_output[1:-1] | |
return cleaned_output.strip() | |
# --- Gradio Application Logic (mostly unchanged from template) --- | |
def run_and_submit_all(profile: gr.OAuthProfile | None): | |
""" | |
Fetches all questions, runs the GaiaAgent on them, submits all answers, | |
and displays the results. | |
""" | |
space_id = os.getenv("SPACE_ID") | |
if profile: | |
username = f"{profile.username}" | |
print(f"User logged in: {username}") | |
else: | |
print("User not logged in.") | |
return "Please Login to Hugging Face with the button.", None | |
api_url = DEFAULT_API_URL | |
questions_url = f"{api_url}/questions" | |
submit_url = f"{api_url}/submit" | |
try: | |
agent = GaiaAgent() | |
except Exception as e: | |
return f"Error initializing agent: {e}", None | |
try: | |
print(f"Fetching questions from: {questions_url}") | |
questions_response = requests.get(questions_url) | |
questions_response.raise_for_status() | |
questions = questions_response.json() | |
print(f"Fetched {len(questions)} questions.") | |
except requests.exceptions.RequestException as e: | |
return f"Error fetching questions: {e}", None | |
all_answers = [] | |
results_data = [] | |
for i, q_data in enumerate(questions): | |
task_id = q_data.get("task_id", f"unknown_{i}") | |
question_text = q_data.get("question", "No question text found.") | |
print(f"\n--- Processing Task ID: {task_id} ---") | |
print(f"Question: {question_text[:100]}...") | |
agent_answer = agent(question_text) | |
all_answers.append({"task_id": task_id, "answer": agent_answer}) | |
results_data.append({ | |
"Task ID": task_id, | |
"Question": question_text, | |
"Agent Answer": agent_answer | |
}) | |
print(f"--- Finished processing Task ID: {task_id} ---") | |
try: | |
print(f"\nSubmitting {len(all_answers)} answers to: {submit_url}") | |
submission_payload = { | |
"username": username, | |
"code_link": f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_execution", | |
"answers": all_answers | |
} | |
submit_response = requests.post(submit_url, json=submission_payload) | |
submit_response.raise_for_status() | |
submission_result = submit_response.json() | |
print(f"Submission successful: {submission_result}") | |
status_message = f"Submission successful!\nScore: {submission_result.get('score', 'N/A')}\nDetails: {submission_result.get('message', 'No message')}" | |
except requests.exceptions.RequestException as e: | |
print(f"Error submitting answers: {e}") | |
status_message = f"Error submitting answers: {e}" | |
results_df = pd.DataFrame(results_data) | |
return status_message, results_df | |
# --- Gradio UI --- | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# GAIA Level 1 Agent Evaluation | |
This application allows you to run your `smolagents`-based agent on the GAIA Level 1 benchmark | |
and submit your answers to the leaderboard. | |
**Important:** | |
1. **Login to Hugging Face** using the button below to submit your score. | |
2. **Set `GEMINI_API_KEY`**: Ensure your `GEMINI_API_KEY` is set as a Space Secret | |
in Hugging Face Spaces (or as an environment variable if running locally) | |
for the Gemini Flash model to function. | |
""" | |
) | |
gr.LoginButton() | |
run_button = gr.Button("Run Evaluation & Submit All Answers") | |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) | |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) | |
run_button.click( | |
fn=run_and_submit_all, | |
outputs=[status_output, results_table] | |
) | |
if __name__ == "__main__": | |
print("\n" + "-"*30 + " App Starting " + "-"*30) | |
space_host_startup = os.getenv("SPACE_HOST") | |
space_id_startup = os.getenv("SPACE_ID") | |
if space_host_startup: | |
print(f"✅ SPACE_HOST found: {space_host_startup}") | |
print(f" Runtime URL should be: https://{space_host_startup}.hf.space") | |
else: | |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).") | |
if space_id_startup: | |
print(f"✅ SPACE_ID found: {space_id_startup}") | |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}") | |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main") | |
else: | |
print("ℹ️ SPACE_ID environment variable not found. Code link might be incorrect for submission.") | |
demo.launch() | |