GaiaAgent

Sleeping

App Files Files Community

GaiaAgent / app.py

kshitijthakkar

Update app.py

c6929d8 verified about 2 months ago

raw

history blame contribute delete

7.45 kB

	import os
	import gradio as gr
	import requests
	import pandas as pd
	from smolagents import (
	CodeAgent,
	DuckDuckGoSearchTool,
	OpenAIServerModel,
	)
	import traceback # Import traceback for detailed error logging

	import subprocess

	class PythonREPLTool:
	name = "python_repl"
	description = "Runs Python code and returns the output or error."


	def __init__(self, timeout=10):
	self.timeout = timeout

	def run(self, code: str) -> str:

	try:
	result = subprocess.run(
	["python3", "-c", code],
	timeout=self.timeout,
	)
	if result.returncode == 0:
	return result.stdout.strip()
	else:
	return f"Error:\n{result.stderr.strip()}"
	except subprocess.TimeoutExpired:
	return "Execution timed out."

	# --- Constants ---
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


	# --- Agent Definition ---
	class GaiaAgent:
	def __init__(self, openai_key: str):
	self.openai_key = openai_key
	# 1) Initialize the LLM-backed model
	self.model = OpenAIServerModel(
	model_id="gpt-4", # or "gpt-3.5-turbo" if you prefer
	api_key=self.openai_key,
	system_prompt=(
	"You are a meticulous AI agent. "
	"Always think in Python code using the available tools. "
	"Never answer without executing or checking with a tool. "
	"Use DuckDuckGoSearchTool for factual lookups. "
	"Use PythonREPLTool for calculations, string manipulation, and logical deductions. "
	"Respond with the final answer only. Do not include any extra explanation. "
	"Here are some examples of how to use the tools:"
	"# Example 1: Calculate the square root of 16\n"
	"# ```python\n"
	"# print(16**0.5)\n"
	"# ```\n"
	"# Example 2: Search for the capital of France\n"
	"# ```python\n"
	"# print(DuckDuckGoSearchTool(query='capital of France'))\n"
	"# ```\n"
	"# Example 3: Reverse a string\n"
	"# ```python\n"
	"# print('hello'[::-1])\n"
	"# ```\n"
	)
	)
	# 2) Define the tools
	self.search_tool = DuckDuckGoSearchTool()
	self.python_tool = PythonREPLTool(timeout=10) # Initialize PythonREPLTool
	# 3) Create the CodeAgent
	self.agent = CodeAgent(
	model=self.model,
	tools=[self.search_tool, self.python_tool],
	# Encourage the agent to think step-by-step in code
	max_steps=20
	),


	def __call__(self, question: str) -> str:
	try:
	return self.agent.run(question)
	except Exception as e:
	error_message = f"Agent execution failed: {e}\n{traceback.format_exc()}"
	print(error_message) # Log the error for debugging
	return "ERROR: Agent failed to answer." # Return a string, not an exception


	def run_and_submit_all(openai_key: str):
	# --- Login & Setup ---
	# if not profile:
	# return "Please log in to Hugging Face to submit your score.", None
	# username = profile.username.strip()
	username = "anonymous"
	# 1) Instantiate our improved agent
	try:
	agent = GaiaAgent(openai_key)
	except Exception as e:
	error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}"
	print(error_message)
	return f"Error initializing agent: {e}", None

	# 2) Fetch the GAIA questions
	questions_url = f"{DEFAULT_API_URL}/questions"
	try:
	resp = requests.get(questions_url, timeout=15)
	resp.raise_for_status()
	questions = resp.json()
	except Exception as e:
	error_message = f"Error fetching questions: {e}\n{traceback.format_exc()}"
	print(error_message)
	return f"Error fetching questions: {e}", None

	# 3) Run the agent on each question
	answers = []
	log = []
	for item in questions:
	tid = item["task_id"]
	q = item["question"]
	try:
	ans = agent(q)
	except Exception as e:
	error_message = f"Error processing question {tid}: {e}\n{traceback.format_exc()}"
	print(error_message) # Print full traceback
	ans = f"ERROR: {e}"
	answers.append({"task_id": tid, "submitted_answer": ans})
	log.append({"Task ID": tid, "Question": q, "Answer": ans})

	# 4) Submit
	submit_url = f"{DEFAULT_API_URL}/submit"
	payload = {
	"username": username,
	"agent_code": f"https://huggingface.co/spaces/kshitijthakkar/GaiaAgent/tree/main",
	"answers": answers,
	}
	try:
	res = requests.post(submit_url, json=payload, timeout=60)
	res.raise_for_status()
	data = res.json()
	status = (
	f"✅ Submission Successful!\n"
	f"User: {data['username']}\n"
	f"Score: {data['score']}% ({data['correct_count']}/{data['total_attempted']})\n"
	f"Message: {data.get('message', '')}"
	)
	except Exception as e:
	error_message = f"Submission failed: {e}\n{traceback.format_exc()}"
	print(error_message)
	status = f"Submission failed: {e}"
	return status, pd.DataFrame(log)



	# --- Gradio UI ---
	def run_test_questions(profile, openai_key, test_questions):
	if not profile:
	return "Please log in to Hugging Face to run the test questions.", None

	try:
	agent = GaiaAgent(openai_key)
	except Exception as e:
	error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}"
	print(error_message)
	return f"Error initializing agent: {e}", None

	log = []
	for q in test_questions:
	try:
	ans = agent(q)
	except Exception as e:
	error_message = f"Error processing test question: {e}\n{traceback.format_exc()}"
	print(error_message)
	ans = f"ERROR: {e}"
	log.append({"Question": q, "Answer": ans})
	return pd.DataFrame(log)



	with gr.Blocks() as demo: # Corrected to use gr.Blocks()
	gr.Markdown("# GAIA Benchmark Runner")
	gr.Markdown(
	"1. Clone this Space and customize your agent logic.\n"
	"2. Log in below (to get your HF username).\n"
	"3. Enter your OpenAI key (if needed).\n"
	"4. Click to run and submit to the leaderboard."
	)
	login = gr.LoginButton()
	key_in = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
	run_btn = gr.Button("Run & Submit")
	out_status = gr.Textbox(label="Status", lines=4)
	out_table = gr.DataFrame(label="Questions & Answers")

	test_questions_input = gr.Textbox(
	label="Test Questions (comma-separated)",
	placeholder="What is the capital of France?, What is the square root of 25?",
	)
	run_test_btn = gr.Button("Run Test Questions")
	test_results_output = gr.DataFrame(label="Test Results")

	run_btn.click(fn=run_and_submit_all, inputs=[key_in], outputs=[out_status, out_table])
	run_test_btn.click(
	fn=run_test_questions,
	inputs=[login, key_in, test_questions_input],
	outputs=[test_results_output],
	)

	if __name__ == "__main__":
	demo.launch(debug=True, share=False)