hassenhamdi commited on
Commit
1939b22
·
verified ·
1 Parent(s): 802dfde

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +149 -139
agent.py CHANGED
@@ -1,146 +1,156 @@
1
- import os
2
- import pandas as pd
3
- import gradio as gr
4
  import logging
5
- import time
6
-
7
-
8
- # Import the new Settings, Evaluator, and Runner classes
9
- from settings import Settings
10
- from evaluator import Evaluator
11
- from runner import Runner
12
-
13
- # Configure logging
14
- logging.basicConfig(level=logging.INFO, force=True)
15
  logger = logging.getLogger(__name__)
16
 
17
- # Initialize settings, evaluator, and runner
18
- settings = Settings()
19
- evaluator = Evaluator(settings)
20
- runner = Runner(settings)
21
-
22
-
23
-
24
- LOGIN_MESSAGE = "Please Login to Hugging Face with the button."
25
- EMPTY_RESULTS_TABLE = pd.DataFrame(columns=['task_id', 'question', 'answer'])
26
-
27
- def _format_elapsed_time(elapsed_time):
28
- """Formats elapsed time into minutes and seconds."""
29
- minutes = int(elapsed_time // 60)
30
- seconds = elapsed_time % 60
31
- if minutes > 0:
32
- return f"Elapsed time: {minutes} minutes {seconds:.2f} seconds"
33
- else:
34
- return f"Elapsed time: {seconds:.2f} seconds"
35
-
36
- def _run_agent_on_questions(questions_list: list, username: str) -> tuple[str, pd.DataFrame]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  """
38
- Helper function to run the agent on a list of questions and return status and results.
 
39
  """
40
- start_time = time.time()
41
- logger.info(f"Starting agent run for user: {username} on {len(questions_list)} questions.")
42
-
43
- # The runner handles the agent execution and saving of answers
44
- question_answer_pairs_df = runner.run_agent(questions_list, username)
45
-
46
- end_time = time.time()
47
- elapsed_time_str = _format_elapsed_time(end_time - start_time)
48
- message = f"Agent run complete. {elapsed_time_str}"
49
- logger.info(message)
50
- return message, question_answer_pairs_df
51
-
52
- def run_one(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
53
- """Runs the agent on one random question."""
54
- if profile:
55
- try:
56
- question = evaluator.get_one_question()
57
- return _run_agent_on_questions([question], profile.username)
58
- except Exception as e:
59
- logger.error(f"Error getting one question: {e}")
60
- return f"Error getting question: {e}", EMPTY_RESULTS_TABLE
61
- else:
62
- return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
63
-
64
- def run_all(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
65
- """Runs the agent on all questions."""
66
- if profile:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  try:
68
- questions = evaluator.get_questions()
69
- return _run_agent_on_questions(questions, profile.username)
 
70
  except Exception as e:
71
- logger.error(f"Error getting all questions: {e}")
72
- return f"Error getting questions: {e}", EMPTY_RESULTS_TABLE
73
- else:
74
- return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
75
-
76
- def submit(profile: gr.OAuthProfile | None) -> str:
77
- """Submits cached answers for evaluation."""
78
- if profile:
79
- return evaluator.submit_answers(profile.username)
80
- else:
81
- return LOGIN_MESSAGE
82
-
83
-
84
- # --- Build Gradio Interface using Blocks ---
85
- with gr.Blocks() as demo:
86
- gr.Markdown("# GAIA Agent Evaluation Runner")
87
- gr.Markdown(
88
- """
89
- **Instructions:**
90
- 1. Log in to your Hugging Face account using the button below.
91
- 2. Click 'Get One Answer' to run the agent on a random question or 'Get All Answers' to run all.
92
- 3. Click 'Submit Answers' to submit answers for evaluation. **Your HF username will be submitted for leaderboard tracking.**
93
- ---
94
- **Disclaimers:**
95
- * Running 'Get All Answers' can take significant time as the agent processes all 20 questions.
96
- * Agent logs are detailed (DEBUG level) and may appear interleaved due to parallel execution.
97
- * The 'Submit Answers' button uses the most recent agent answers cached locally for your username.
98
- * **API Keys Required:** Ensure `GEMINI_API_KEY` is set as a Space Secret (or environment variable if running locally).
99
- """
100
- )
101
-
102
- gr.LoginButton()
103
-
104
- run_one_button = gr.Button("Get One Answer")
105
- run_all_button = gr.Button("Get All Answers")
106
- submit_button = gr.Button("Submit Answers")
107
-
108
- status_output = gr.Textbox(
109
- label="Run Status / Submission Result", lines=5, interactive=False)
110
- results_table = gr.DataFrame(
111
- label="Questions and Agent Answers", wrap=True)
112
-
113
- run_one_button.click(
114
- fn=run_one, outputs=[status_output, results_table]
115
- )
116
- run_all_button.click(
117
- fn=run_all, outputs=[status_output, results_table]
118
- )
119
- submit_button.click(
120
- fn=submit, outputs=[status_output]
121
- )
122
-
123
- if __name__ == "__main__":
124
- logger.info("\n" + "-"*30 + " App Starting " + "-"*30)
125
-
126
- # Check for SPACE_HOST and SPACE_ID at startup for information
127
- space_host_startup = os.getenv("SPACE_HOST")
128
- space_id_startup = os.getenv("SPACE_ID")
129
-
130
- if space_host_startup:
131
- logger.info(f"✅ SPACE_HOST found: {space_host_startup}")
132
- logger.info(f" Runtime URL should be: https://{space_host_startup}.hf.space")
133
- else:
134
- logger.info("ℹ️ SPACE_HOST environment variable not found (running locally?).")
135
-
136
- if space_id_startup:
137
- logger.info(f"✅ SPACE_ID found: {space_id_startup}")
138
- logger.info(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
139
- logger.info(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
140
- else:
141
- logger.info("ℹ️ SPACE_ID environment variable not found. Repo URL cannot be determined.")
142
-
143
- logger.info("-"*(60 + len(" App Starting ")) + "\n")
144
-
145
- logger.info("Launching Gradio Interface for GAIA Agent Evaluation...")
146
- demo.launch(debug=True, share=False)
 
1
+ # agent.py
2
+ import contextlib
3
+ import io
4
  import logging
5
+ import os
 
 
 
 
 
 
 
 
 
6
  logger = logging.getLogger(__name__)
7
 
8
+ from models import GoogleModelID # Import GoogleModelID
9
+ from settings import Settings
10
+ from smolagents import OpenAIServerModel, CodeAgent, FinalAnswerTool # Changed from LiteLLMModel
11
+ from smolagents import DuckDuckGoSearchTool, VisitWebpageTool # Changed from GoogleSearchTool
12
+ from smolagents.local_python_executor import BASE_PYTHON_TOOLS
13
+ from tools import GetTaskFileTool, VideoUnderstandingTool, AudioUnderstandingTool
14
+ from tools import ChessBoardFENTool, BestChessMoveTool, ConvertChessMoveTool, ExcelParsingTool
15
+ import json # Added for BASE_PYTHON_TOOLS
16
+ import pandas as pd # Added for BASE_PYTHON_TOOLS
17
+
18
+
19
+ # Extend BASE_PYTHON_TOOLS for the PythonInterpreterTool to have access to these
20
+ BASE_PYTHON_TOOLS["open"] = open
21
+ BASE_PYTHON_TOOLS["os"] = os
22
+ BASE_PYTHON_TOOLS["io"] = io
23
+ BASE_PYTHON_TOOLS["contextlib"] = contextlib
24
+ BASE_PYTHON_TOOLS["exec"] = exec # Note: exec is powerful, use with caution in production
25
+ BASE_PYTHON_TOOLS["json"] = json # For parsing JSON if needed by agent
26
+ BASE_PYTHON_TOOLS["pd"] = pd # For pandas operations if needed by agent
27
+
28
+ class ResearchAgent:
29
+ def __init__(self, settings: Settings):
30
+ self.agent = CodeAgent(
31
+ name="researcher",
32
+ description="A specialized agent for web research, video analysis, and audio understanding. Give it your query as an argument. Use 'duckduckgo_search_tool' for web searches, 'visit_webpage_tool' to read web page content, 'video_understanding_tool' for YouTube videos, and 'audio_understanding_tool' for local audio files.",
33
+ add_base_tools=False,
34
+ tools=[
35
+ DuckDuckGoSearchTool(), # Changed from GoogleSearchTool
36
+ VisitWebpageTool(max_output_length=100000),
37
+ VideoUnderstandingTool(settings, GoogleModelID.GEMINI_2_0_FLASH), # Still uses 2.0 Flash for specific multimodal tasks
38
+ AudioUnderstandingTool(settings, GoogleModelID.GEMINI_2_0_FLASH) # Still uses 2.0 Flash for specific multimodal tasks
39
+ ],
40
+ additional_authorized_imports=[
41
+ "unicodedata", "stat", "datetime", "random", "pandas", "itertools",
42
+ "math", "statistics", "queue", "time", "collections", "re", "os",
43
+ "json", "io", "urllib.parse"
44
+ ],
45
+ max_steps=15,
46
+ verbosity_level=2,
47
+ model=OpenAIServerModel( # Changed to OpenAIServerModel
48
+ model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
49
+ api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
50
+ api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
51
+ temperature=0.1,
52
+ timeout=180
53
+ )
54
+ )
55
+ logger.info("ResearchAgent initialized.")
56
+
57
+ class ChessAgent:
58
+ def __init__(self, settings: Settings):
59
+ self.agent = CodeAgent(
60
+ name="chess_player",
61
+ description="Makes a chess move. Give it a query including board image filepath and player turn (black or white).",
62
+ add_base_tools=False,
63
+ tools=[
64
+ ChessBoardFENTool(),
65
+ BestChessMoveTool(settings),
66
+ ConvertChessMoveTool(settings, GoogleModelID.GEMINI_2_5_FLASH_PREVIEW), # Changed to Gemini Flash Preview
67
+ ],
68
+ additional_authorized_imports=[
69
+ "unicodedata", "stat", "datetime", "random", "pandas", "itertools",
70
+ "math", "statistics", "queue", "time", "collections", "re", "os",
71
+ "json", "urllib.parse"
72
+ ],
73
+ max_steps=10,
74
+ verbosity_level=2,
75
+ model=OpenAIServerModel( # Changed to OpenAIServerModel
76
+ model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
77
+ api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
78
+ api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
79
+ temperature=0.0,
80
+ timeout=180
81
+ )
82
+ )
83
+ logger.info("ChessAgent initialized.")
84
+
85
+ class ManagerAgent:
86
  """
87
+ The main orchestrating agent that routes questions to specialized sub-agents
88
+ or handles them directly with its own tools.
89
  """
90
+ def __init__(self, settings: Settings):
91
+ self.settings = settings
92
+ self.researcher = ResearchAgent(settings).agent
93
+ self.chess_player = ChessAgent(settings).agent
94
+
95
+ # Main manager agent
96
+ self.agent = CodeAgent(
97
+ name="manager",
98
+ description=(
99
+ "You are a highly capable AI assistant designed to solve complex GAIA benchmark questions. "
100
+ "Your primary role is to route tasks to the most appropriate specialized agent: "
101
+ "'researcher' for general knowledge, web browsing, video, and audio understanding tasks, "
102
+ "or 'chess_player' for chess-related tasks. "
103
+ "If a task involves downloading a file, use 'get_task_file_tool' first. "
104
+ "If you have the final answer, use 'final_answer_tool'.\n\n"
105
+ "**Available Tools:**\n"
106
+ "- `get_task_file_tool(task_id: str, file_name: str)`: Downloads a file associated with a task.\n"
107
+ "- `final_answer_tool(answer: str)`: Use this when you have the exact final answer.\n\n"
108
+ "**Managed Agents:**\n"
109
+ "- `researcher(query: str)`: Use for questions requiring web search, video analysis, or audio analysis.\n"
110
+ "- `chess_player(query: str)`: Use for questions related to chess positions or moves.\n\n"
111
+ "Think step-by-step. If a task involves a file, use `get_task_file_tool` first to download it, then pass the file path to the appropriate sub-agent or tool."
112
+ ),
113
+ tools=[
114
+ GetTaskFileTool(settings),
115
+ FinalAnswerTool(),
116
+ ExcelParsingTool(settings) # Added ExcelParsingTool to ManagerAgent as it handles file paths
117
+ ],
118
+ model=OpenAIServerModel( # Changed to OpenAIServerModel
119
+ model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
120
+ api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
121
+ api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
122
+ temperature=0.0,
123
+ timeout=180
124
+ ),
125
+ managed_agents=[self.researcher, self.chess_player],
126
+ verbosity_level=2,
127
+ max_steps=20
128
+ )
129
+ logger.info("ManagerAgent initialized.")
130
+
131
+ def __call__(self, question_data: dict) -> str:
132
+ task_id = question_data.get("task_id", "N/A")
133
+ question_text = question_data.get("question", "")
134
+ file_name = question_data.get("file_name", "")
135
+
136
+ enriched_question = (
137
+ f"{question_text} "
138
+ f"task_id: {task_id}. "
139
+ f"Your final answer should be a number or as few words as possible. "
140
+ f"Only use abbreviations when the question calls for abbreviations. "
141
+ f"If needed, use a comma separated list of values; the comma is always followed by a space. "
142
+ f"Critically review your answer before making it the final answer. "
143
+ f"Double check the answer to make sure it meets all format requirements stated in the question. "
144
+ )
145
+ if file_name:
146
+ enriched_question = f"{enriched_question} file_name: {file_name} (use get_task_file_tool to fetch this file and then pass its path to the relevant tool/agent, or excel_parsing_tool if it's an Excel file)." # Updated prompt for Excel
147
+
148
+ logger.info(f"ManagerAgent received question (first 100 chars): {enriched_question[:100]}...")
149
+
150
  try:
151
+ final_answer = self.agent.run(enriched_question)
152
+ logger.info(f"ManagerAgent returning final answer: {final_answer}")
153
+ return final_answer
154
  except Exception as e:
155
+ logger.error(f"Error running ManagerAgent on task {task_id}: {e}")
156
+ return f"AGENT ERROR: {e}"