Spaces:
Runtime error
Runtime error
File size: 8,359 Bytes
6573e0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# agent.py
import contextlib
import io
import logging
import os
logger = logging.getLogger(__name__)
from models import GoogleModelID # Import GoogleModelID
from settings import Settings
from smolagents import OpenAIServerModel, CodeAgent, FinalAnswerTool # Changed from LiteLLMModel
from smolagents import DuckDuckGoSearchTool, VisitWebpageTool # Changed from GoogleSearchTool
from smolagents.local_python_executor import BASE_PYTHON_TOOLS
from tools import GetTaskFileTool, VideoUnderstandingTool, AudioUnderstandingTool
from tools import ChessBoardFENTool, BestChessMoveTool, ConvertChessMoveTool, ExcelParsingTool
import json # Added for BASE_PYTHON_TOOLS
import pandas as pd # Added for BASE_PYTHON_TOOLS
# Extend BASE_PYTHON_TOOLS for the PythonInterpreterTool to have access to these
BASE_PYTHON_TOOLS["open"] = open
BASE_PYTHON_TOOLS["os"] = os
BASE_PYTHON_TOOLS["io"] = io
BASE_PYTHON_TOOLS["contextlib"] = contextlib
BASE_PYTHON_TOOLS["exec"] = exec # Note: exec is powerful, use with caution in production
BASE_PYTHON_TOOLS["json"] = json # For parsing JSON if needed by agent
BASE_PYTHON_TOOLS["pd"] = pd # For pandas operations if needed by agent
class ResearchAgent:
def __init__(self, settings: Settings):
self.agent = CodeAgent(
name="researcher",
description="A specialized agent for web research, video analysis, and audio understanding. Give it your query as an argument. Use 'duckduckgo_search_tool' for web searches, 'visit_webpage_tool' to read web page content, 'video_understanding_tool' for YouTube videos, and 'audio_understanding_tool' for local audio files.",
add_base_tools=False,
tools=[
DuckDuckGoSearchTool(), # Changed from GoogleSearchTool
VisitWebpageTool(max_output_length=100000),
VideoUnderstandingTool(settings, GoogleModelID.GEMINI_2_0_FLASH), # Still uses 2.0 Flash for specific multimodal tasks
AudioUnderstandingTool(settings, GoogleModelID.GEMINI_2_0_FLASH) # Still uses 2.0 Flash for specific multimodal tasks
],
additional_authorized_imports=[
"unicodedata", "stat", "datetime", "random", "pandas", "itertools",
"math", "statistics", "queue", "time", "collections", "re", "os",
"json", "io", "urllib.parse"
],
max_steps=15,
verbosity_level=2,
model=OpenAIServerModel( # Changed to OpenAIServerModel
model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
temperature=0.1,
timeout=180
)
)
logger.info("ResearchAgent initialized.")
class ChessAgent:
def __init__(self, settings: Settings):
self.agent = CodeAgent(
name="chess_player",
description="Makes a chess move. Give it a query including board image filepath and player turn (black or white).",
add_base_tools=False,
tools=[
ChessBoardFENTool(),
BestChessMoveTool(settings),
ConvertChessMoveTool(settings, GoogleModelID.GEMINI_2_5_FLASH_PREVIEW), # Changed to Gemini Flash Preview
],
additional_authorized_imports=[
"unicodedata", "stat", "datetime", "random", "pandas", "itertools",
"math", "statistics", "queue", "time", "collections", "re", "os",
"json", "urllib.parse"
],
max_steps=10,
verbosity_level=2,
model=OpenAIServerModel( # Changed to OpenAIServerModel
model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
temperature=0.0,
timeout=180
)
)
logger.info("ChessAgent initialized.")
class ManagerAgent:
"""
The main orchestrating agent that routes questions to specialized sub-agents
or handles them directly with its own tools.
"""
def __init__(self, settings: Settings):
self.settings = settings
self.researcher = ResearchAgent(settings).agent
self.chess_player = ChessAgent(settings).agent
# Main manager agent
self.agent = CodeAgent(
name="manager",
description=(
"You are a highly capable AI assistant designed to solve complex GAIA benchmark questions. "
"Your primary role is to route tasks to the most appropriate specialized agent: "
"'researcher' for general knowledge, web browsing, video, and audio understanding tasks, "
"or 'chess_player' for chess-related tasks. "
"If a task involves downloading a file, use 'get_task_file_tool' first. "
"If you have the final answer, use 'final_answer_tool'.\n\n"
"**Available Tools:**\n"
"- `get_task_file_tool(task_id: str, file_name: str)`: Downloads a file associated with a task.\n"
"- `final_answer_tool(answer: str)`: Use this when you have the exact final answer.\n\n"
"**Managed Agents:**\n"
"- `researcher(query: str)`: Use for questions requiring web search, video analysis, or audio analysis.\n"
"- `chess_player(query: str)`: Use for questions related to chess positions or moves.\n\n"
"Think step-by-step. If a task involves a file, use `get_task_file_tool` first to download it, then pass the file path to the appropriate sub-agent or tool."
),
tools=[
GetTaskFileTool(settings),
FinalAnswerTool(),
ExcelParsingTool(settings) # Added ExcelParsingTool to ManagerAgent as it handles file paths
],
model=OpenAIServerModel( # Changed to OpenAIServerModel
model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
temperature=0.0,
timeout=180
),
managed_agents=[self.researcher, self.chess_player],
verbosity_level=2,
max_steps=20
)
logger.info("ManagerAgent initialized.")
def __call__(self, question_data: dict) -> str:
task_id = question_data.get("task_id", "N/A")
question_text = question_data.get("question", "")
file_name = question_data.get("file_name", "")
enriched_question = (
f"{question_text} "
f"task_id: {task_id}. "
f"Your final answer should be a number or as few words as possible. "
f"Only use abbreviations when the question calls for abbreviations. "
f"If needed, use a comma separated list of values; the comma is always followed by a space. "
f"Critically review your answer before making it the final answer. "
f"Double check the answer to make sure it meets all format requirements stated in the question. "
)
if file_name:
enriched_question = f"{enriched_question} file_name: {file_name} (use get_task_file_tool to fetch this file and then pass its path to the relevant tool/agent, or excel_parsing_tool if it's an Excel file)." # Updated prompt for Excel
logger.info(f"ManagerAgent received question (first 100 chars): {enriched_question[:100]}...")
try:
final_answer = self.agent.run(enriched_question)
logger.info(f"ManagerAgent returning final answer: {final_answer}")
return final_answer
except Exception as e:
logger.error(f"Error running ManagerAgent on task {task_id}: {e}")
return f"AGENT ERROR: {e}"
|