Spaces:

davidgturner
/

GaiaAgentEvaluator

Sleeping

App Files Files Community

GaiaAgentEvaluator / app.py

davidgturner

- changes for running agent

f5bafc2 3 months ago

raw

history blame

9.44 kB

	import os
	import gradio as gr
	import requests
	import inspect
	import pandas as pd
	import time
	import json
	import io
	import base64
	from typing import Dict, List, Union, Optional
	import re
	import sys
	from bs4 import BeautifulSoup
	from duckduckgo_search import DDGS
	import pytube
	from dateutil import parser
	try:
	from youtube_transcript_api import YouTubeTranscriptApi
	except ImportError:
	print("YouTube Transcript API not installed. Video transcription may be limited.")

	from smolagents import Tool, CodeAgent, InferenceClientModel

	# Import internal modules
	from config import (
	DEFAULT_API_URL, LLAMA_API_URL, HF_API_TOKEN, HEADERS,
	MAX_RETRIES, RETRY_DELAY
	)
	from tools.tool_manager import ToolManager

	class GaiaToolCallingAgent:
	"""Tool-calling agent specifically designed for the GAIA system."""

	def __init__(self):
	print("GaiaToolCallingAgent initialized.")
	self.tool_manager = ToolManager()
	self.name = "tool_agent" # Add required name attribute for smolagents integration
	self.description = "A specialized agent that uses various tools to answer questions" # Required by smolagents

	def run(self, query: str) -> str:
	"""Process a query and return a response using available tools."""
	tools = self.tool_manager.get_tools()

	# For each tool, try to get relevant information
	context_info = []

	for tool in tools:
	try:
	if self._should_use_tool(tool, query):
	result = tool.forward(query)
	if result:
	context_info.append(f"{tool.name} Results:\n{result}")
	except Exception as e:
	print(f"Error using {tool.name}: {e}")

	# Combine all context information
	full_context = "\n\n".join(context_info) if context_info else ""

	return full_context

	def __call__(self, query: str) -> str:
	"""Make the agent callable so it can be used directly by CodeAgent."""
	print(f"Tool agent received query: {query}")
	return self.run(query)

	def _should_use_tool(self, tool: Tool, query: str) -> bool:
	"""Determine if a specific tool should be used for the query."""
	query_lower = query.lower()

	# Tool-specific patterns
	patterns = {
	"web_search": ["current", "latest", "recent", "who", "what", "when", "where", "how"],
	"web_content": ["content", "webpage", "website", "page"],
	"youtube_video": ["youtube.com", "youtu.be"],
	"wikipedia_search": ["wikipedia", "wiki", "article"],
	"gaia_retriever": ["gaia", "agent", "ai", "artificial intelligence"]
	}

	return any(pattern in query_lower for pattern in patterns.get(tool.name, []))

	def create_manager_agent() -> CodeAgent:
	"""Create and configure the main GAIA agent."""

	# Initialize the managed tool-calling agent
	tool_agent = GaiaToolCallingAgent()

	# Create the manager agent
	manager_agent = CodeAgent(
	model=InferenceClientModel(),
	tools=[], # No direct tools for manager
	managed_agents=[tool_agent],
	additional_authorized_imports=[
	"json",
	"pandas",
	"numpy",
	"re",
	"requests",
	"bs4"
	],
	planning_interval=3,
	verbosity_level=2,
	max_steps=10
	)

	return manager_agent

	def create_agent():
	"""Create the GAIA agent system."""
	try:
	print("Initializing GAIA agent system...")
	return create_manager_agent()
	except Exception as e:
	print(f"Error creating GAIA agent: {e}")
	return None

	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	"""
	Fetches all questions, runs the GAIA agent on them, submits all answers,
	and displays the results.
	"""
	# --- Determine HF Space Runtime URL and Repo URL ---
	space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code

	if profile:
	username = f"{profile.username}"
	print(f"User logged in: {username}")
	else:
	print("User not logged in.")
	return "Please Login to Hugging Face with the button.", None

	api_url = DEFAULT_API_URL
	questions_url = f"{api_url}/questions"
	submit_url = f"{api_url}/submit"

	# 1. Initialize Agent
	try:
	print("Initializing GAIA agent system...")
	agent = create_agent()
	if not agent:
	return "Error: Could not initialize agent.", None
	print("GAIA agent initialization complete.")
	except Exception as e:
	print(f"Error initializing agent: {e}")
	return f"Error initializing agent: {e}", None

	# 2. Fetch Questions
	print(f"Fetching questions from: {questions_url}")
	try:
	response = requests.get(questions_url, timeout=15)
	response.raise_for_status()
	questions_data = response.json()
	if not questions_data:
	print("Fetched questions list is empty.")
	return "Fetched questions list is empty or invalid format.", None
	print(f"Fetched {len(questions_data)} questions.")
	except requests.exceptions.RequestException as e:
	print(f"Error fetching questions: {e}")
	return f"Error fetching questions: {e}", None
	except Exception as e:
	print(f"An unexpected error occurred fetching questions: {e}")
	return f"An unexpected error occurred fetching questions: {e}", None

	# 3. Run Agent on Questions
	results_log = []
	answers_payload = []
	print(f"Running agent on {len(questions_data)} questions...")
	for item in questions_data:
	task_id = item.get("task_id")
	question_text = item.get("question")
	if not task_id or question_text is None:
	print(f"Skipping item with missing task_id or question: {item}")
	continue
	try:
	# Run the agent and get the response
	response = agent.run(f"Answer this question concisely: {question_text}")

	# Clean up the response if needed
	if isinstance(response, dict):
	submitted_answer = response.get("answer", str(response))
	else:
	submitted_answer = str(response)

	# Add to submission payload
	answers_payload.append({
	"task_id": task_id,
	"submitted_answer": submitted_answer
	})

	# Log the result
	results_log.append({
	"Task ID": task_id,
	"Question": question_text,
	"Submitted Answer": submitted_answer
	})

	except Exception as e:
	print(f"Error running agent on task {task_id}: {e}")
	results_log.append({
	"Task ID": task_id,
	"Question": question_text,
	"Submitted Answer": f"AGENT ERROR: {e}"
	})

	if not answers_payload:
	print("Agent did not produce any answers to submit.")
	return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

	# 4. Prepare Submission
	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
	submission_data = {
	"username": username.strip(),
	"agent_code": agent_code,
	"answers": answers_payload
	} # 5. Submit
	print(f"Submitting {len(answers_payload)} answers to API...")
	try:
	response = requests.post(submit_url, json=submission_data, timeout=60)
	response.raise_for_status()
	result_data = response.json()

	status_message = (
	f"Submission Successful!\n"
	f"User: {result_data.get('username')}\n"
	f"Overall Score: {result_data.get('score', 'N/A')}% "
	f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
	f"Message: {result_data.get('message', 'No message received.')}"
	)
	print("Submission successful.")
	return status_message, pd.DataFrame(results_log)
	except Exception as e:
	status_message = f"Submission Failed: {str(e)}"
	print(f"Error during submission: {e}")
	return status_message, pd.DataFrame(results_log)

	# --- Build Gradio Interface using Blocks ---
	with gr.Blocks() as demo:
	gr.Markdown("# GAIA Agent Evaluation Runner")
	gr.Markdown(
	"""
	Instructions:
	1. Log in to your Hugging Face account using the button below.
	2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and see the score.

	The agent uses a managed tool-calling architecture and the smolagents framework for reliable answers.
	"""
	)

	gr.LoginButton()
	run_button = gr.Button("Run Evaluation & Submit All Answers")
	status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
	results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

	run_button.click(
	fn=run_and_submit_all,
	outputs=[status_output, results_table]
	)

	if __name__ == "__main__":
	print("\n" + "-"30 + " GAIA Agent Starting " + "-"30)
	demo.launch(debug=True, share=False)