hassenhamdi commited on
Commit
802dfde
·
verified ·
1 Parent(s): 9ef9270

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +138 -149
agent.py CHANGED
@@ -1,157 +1,146 @@
1
- # agent.py
2
- import contextlib
3
- import io
4
- import logging
5
  import os
6
- logger = logging.getLogger(__name__)
 
 
 
 
7
 
8
- from models import GoogleModelID # Import GoogleModelID
9
  from settings import Settings
10
- from smolagents import OpenAIServerModel, CodeAgent, FinalAnswerTool # Changed from LiteLLMModel
11
- from smolagents import DuckDuckGoSearchTool, VisitWebpageTool # Changed from GoogleSearchTool
12
- from smolagents.local_python_executor import BASE_PYTHON_TOOLS
13
- from tools import GetTaskFileTool, VideoUnderstandingTool, AudioUnderstandingTool
14
- from tools import ChessBoardFENTool, BestChessMoveTool, ConvertChessMoveTool, ExcelParsingTool
15
- import json # Added for BASE_PYTHON_TOOLS
16
- import pandas as pd # Added for BASE_PYTHON_TOOLS
17
-
18
-
19
- # Extend BASE_PYTHON_TOOLS for the PythonInterpreterTool to have access to these
20
- BASE_PYTHON_TOOLS["open"] = open
21
- BASE_PYTHON_TOOLS["os"] = os
22
- BASE_PYTHON_TOOLS["io"] = io
23
- BASE_PYTHON_TOOLS["contextlib"] = contextlib
24
- BASE_PYTHON_TOOLS["exec"] = exec # Note: exec is powerful, use with caution in production
25
- BASE_PYTHON_TOOLS["json"] = json # For parsing JSON if needed by agent
26
- BASE_PYTHON_TOOLS["pd"] = pd # For pandas operations if needed by agent
27
-
28
- class ResearchAgent:
29
- def __init__(self, settings: Settings):
30
- self.agent = CodeAgent(
31
- name="researcher",
32
- description="A specialized agent for web research, video analysis, and audio understanding. Give it your query as an argument. Use 'duckduckgo_search_tool' for web searches, 'visit_webpage_tool' to read web page content, 'video_understanding_tool' for YouTube videos, and 'audio_understanding_tool' for local audio files.",
33
- add_base_tools=False,
34
- tools=[
35
- DuckDuckGoSearchTool(), # Changed from GoogleSearchTool
36
- VisitWebpageTool(max_output_length=100000),
37
- VideoUnderstandingTool(settings, GoogleModelID.GEMINI_2_0_FLASH), # Still uses 2.0 Flash for specific multimodal tasks
38
- AudioUnderstandingTool(settings, GoogleModelID.GEMINI_2_0_FLASH) # Still uses 2.0 Flash for specific multimodal tasks
39
- ],
40
- additional_authorized_imports=[
41
- "unicodedata", "stat", "datetime", "random", "pandas", "itertools",
42
- "math", "statistics", "queue", "time", "collections", "re", "os",
43
- "json", "io", "urllib.parse"
44
- ],
45
- max_steps=15,
46
- verbosity_level=2,
47
- model=OpenAIServerModel( # Changed to OpenAIServerModel
48
- model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
49
- api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
50
- api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
51
- temperature=0.1,
52
- timeout=180
53
- )
54
- )
55
- logger.info("ResearchAgent initialized.")
56
-
57
- class ChessAgent:
58
- def __init__(self, settings: Settings):
59
- self.agent = CodeAgent(
60
- name="chess_player",
61
- description="Makes a chess move. Give it a query including board image filepath and player turn (black or white).",
62
- add_base_tools=False,
63
- tools=[
64
- ChessBoardFENTool(),
65
- BestChessMoveTool(settings),
66
- ConvertChessMoveTool(settings, GoogleModelID.GEMINI_2_5_FLASH_PREVIEW), # Changed to Gemini Flash Preview
67
- ],
68
- additional_authorized_imports=[
69
- "unicodedata", "stat", "datetime", "random", "pandas", "itertools",
70
- "math", "statistics", "queue", "time", "collections", "re", "os",
71
- "json", "urllib.parse"
72
- ],
73
- max_steps=10,
74
- verbosity_level=2,
75
- model=OpenAIServerModel( # Changed to OpenAIServerModel
76
- model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
77
- api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
78
- api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
79
- temperature=0.0,
80
- timeout=180
81
- )
82
- )
83
- logger.info("ChessAgent initialized.")
84
-
85
- class ManagerAgent:
86
  """
87
- The main orchestrating agent that routes questions to specialized sub-agents
88
- or handles them directly with its own tools.
89
  """
90
- def __init__(self, settings: Settings):
91
- self.settings = settings
92
- self.researcher = ResearchAgent(settings).agent
93
- self.chess_player = ChessAgent(settings).agent
94
-
95
- # Main manager agent
96
- self.agent = CodeAgent(
97
- name="manager",
98
- description=(
99
- "You are a highly capable AI assistant designed to solve complex GAIA benchmark questions. "
100
- "Your primary role is to route tasks to the most appropriate specialized agent: "
101
- "'researcher' for general knowledge, web browsing, video, and audio understanding tasks, "
102
- "or 'chess_player' for chess-related tasks. "
103
- "If a task involves downloading a file, use 'get_task_file_tool' first. "
104
- "If you have the final answer, use 'final_answer_tool'.\n\n"
105
- "**Available Tools:**\n"
106
- "- `get_task_file_tool(task_id: str, file_name: str)`: Downloads a file associated with a task.\n"
107
- "- `final_answer_tool(answer: str)`: Use this when you have the exact final answer.\n\n"
108
- "**Managed Agents:**\n"
109
- "- `researcher(query: str)`: Use for questions requiring web search, video analysis, or audio analysis.\n"
110
- "- `chess_player(query: str)`: Use for questions related to chess positions or moves.\n\n"
111
- "Think step-by-step. If a task involves a file, use `get_task_file_tool` first to download it, then pass the file path to the appropriate sub-agent or tool."
112
- ),
113
- tools=[
114
- GetTaskFileTool(settings),
115
- FinalAnswerTool(),
116
- ExcelParsingTool(settings) # Added ExcelParsingTool to ManagerAgent as it handles file paths
117
- ],
118
- model=OpenAIServerModel( # Changed to OpenAIServerModel
119
- model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
120
- api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
121
- api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
122
- temperature=0.0,
123
- timeout=180
124
- ),
125
- managed_agents=[self.researcher, self.chess_player],
126
- verbosity_level=2,
127
- max_steps=20
128
- )
129
- logger.info("ManagerAgent initialized.")
130
-
131
- def __call__(self, question_data: dict) -> str:
132
- task_id = question_data.get("task_id", "N/A")
133
- question_text = question_data.get("question", "")
134
- file_name = question_data.get("file_name", "")
135
-
136
- enriched_question = (
137
- f"{question_text} "
138
- f"task_id: {task_id}. "
139
- f"Your final answer should be a number or as few words as possible. "
140
- f"Only use abbreviations when the question calls for abbreviations. "
141
- f"If needed, use a comma separated list of values; the comma is always followed by a space. "
142
- f"Critically review your answer before making it the final answer. "
143
- f"Double check the answer to make sure it meets all format requirements stated in the question. "
144
- )
145
- if file_name:
146
- enriched_question = f"{enriched_question} file_name: {file_name} (use get_task_file_tool to fetch this file and then pass its path to the relevant tool/agent, or excel_parsing_tool if it's an Excel file)." # Updated prompt for Excel
147
-
148
- logger.info(f"ManagerAgent received question (first 100 chars): {enriched_question[:100]}...")
149
-
150
  try:
151
- final_answer = self.agent.run(enriched_question)
152
- logger.info(f"ManagerAgent returning final answer: {final_answer}")
153
- return final_answer
154
  except Exception as e:
155
- logger.error(f"Error running ManagerAgent on task {task_id}: {e}")
156
- return f"AGENT ERROR: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
 
 
 
 
 
 
 
1
  import os
2
+ import pandas as pd
3
+ import gradio as gr
4
+ import logging
5
+ import time
6
+
7
 
8
+ # Import the new Settings, Evaluator, and Runner classes
9
  from settings import Settings
10
+ from evaluator import Evaluator
11
+ from runner import Runner
12
+
13
+ # Configure logging
14
+ logging.basicConfig(level=logging.INFO, force=True)
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Initialize settings, evaluator, and runner
18
+ settings = Settings()
19
+ evaluator = Evaluator(settings)
20
+ runner = Runner(settings)
21
+
22
+
23
+
24
+ LOGIN_MESSAGE = "Please Login to Hugging Face with the button."
25
+ EMPTY_RESULTS_TABLE = pd.DataFrame(columns=['task_id', 'question', 'answer'])
26
+
27
+ def _format_elapsed_time(elapsed_time):
28
+ """Formats elapsed time into minutes and seconds."""
29
+ minutes = int(elapsed_time // 60)
30
+ seconds = elapsed_time % 60
31
+ if minutes > 0:
32
+ return f"Elapsed time: {minutes} minutes {seconds:.2f} seconds"
33
+ else:
34
+ return f"Elapsed time: {seconds:.2f} seconds"
35
+
36
+ def _run_agent_on_questions(questions_list: list, username: str) -> tuple[str, pd.DataFrame]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  """
38
+ Helper function to run the agent on a list of questions and return status and results.
 
39
  """
40
+ start_time = time.time()
41
+ logger.info(f"Starting agent run for user: {username} on {len(questions_list)} questions.")
42
+
43
+ # The runner handles the agent execution and saving of answers
44
+ question_answer_pairs_df = runner.run_agent(questions_list, username)
45
+
46
+ end_time = time.time()
47
+ elapsed_time_str = _format_elapsed_time(end_time - start_time)
48
+ message = f"Agent run complete. {elapsed_time_str}"
49
+ logger.info(message)
50
+ return message, question_answer_pairs_df
51
+
52
+ def run_one(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
53
+ """Runs the agent on one random question."""
54
+ if profile:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  try:
56
+ question = evaluator.get_one_question()
57
+ return _run_agent_on_questions([question], profile.username)
 
58
  except Exception as e:
59
+ logger.error(f"Error getting one question: {e}")
60
+ return f"Error getting question: {e}", EMPTY_RESULTS_TABLE
61
+ else:
62
+ return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
63
+
64
+ def run_all(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
65
+ """Runs the agent on all questions."""
66
+ if profile:
67
+ try:
68
+ questions = evaluator.get_questions()
69
+ return _run_agent_on_questions(questions, profile.username)
70
+ except Exception as e:
71
+ logger.error(f"Error getting all questions: {e}")
72
+ return f"Error getting questions: {e}", EMPTY_RESULTS_TABLE
73
+ else:
74
+ return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
75
+
76
+ def submit(profile: gr.OAuthProfile | None) -> str:
77
+ """Submits cached answers for evaluation."""
78
+ if profile:
79
+ return evaluator.submit_answers(profile.username)
80
+ else:
81
+ return LOGIN_MESSAGE
82
+
83
+
84
+ # --- Build Gradio Interface using Blocks ---
85
+ with gr.Blocks() as demo:
86
+ gr.Markdown("# GAIA Agent Evaluation Runner")
87
+ gr.Markdown(
88
+ """
89
+ **Instructions:**
90
+ 1. Log in to your Hugging Face account using the button below.
91
+ 2. Click 'Get One Answer' to run the agent on a random question or 'Get All Answers' to run all.
92
+ 3. Click 'Submit Answers' to submit answers for evaluation. **Your HF username will be submitted for leaderboard tracking.**
93
+ ---
94
+ **Disclaimers:**
95
+ * Running 'Get All Answers' can take significant time as the agent processes all 20 questions.
96
+ * Agent logs are detailed (DEBUG level) and may appear interleaved due to parallel execution.
97
+ * The 'Submit Answers' button uses the most recent agent answers cached locally for your username.
98
+ * **API Keys Required:** Ensure `GEMINI_API_KEY` is set as a Space Secret (or environment variable if running locally).
99
+ """
100
+ )
101
+
102
+ gr.LoginButton()
103
+
104
+ run_one_button = gr.Button("Get One Answer")
105
+ run_all_button = gr.Button("Get All Answers")
106
+ submit_button = gr.Button("Submit Answers")
107
+
108
+ status_output = gr.Textbox(
109
+ label="Run Status / Submission Result", lines=5, interactive=False)
110
+ results_table = gr.DataFrame(
111
+ label="Questions and Agent Answers", wrap=True)
112
+
113
+ run_one_button.click(
114
+ fn=run_one, outputs=[status_output, results_table]
115
+ )
116
+ run_all_button.click(
117
+ fn=run_all, outputs=[status_output, results_table]
118
+ )
119
+ submit_button.click(
120
+ fn=submit, outputs=[status_output]
121
+ )
122
+
123
+ if __name__ == "__main__":
124
+ logger.info("\n" + "-"*30 + " App Starting " + "-"*30)
125
+
126
+ # Check for SPACE_HOST and SPACE_ID at startup for information
127
+ space_host_startup = os.getenv("SPACE_HOST")
128
+ space_id_startup = os.getenv("SPACE_ID")
129
+
130
+ if space_host_startup:
131
+ logger.info(f"✅ SPACE_HOST found: {space_host_startup}")
132
+ logger.info(f" Runtime URL should be: https://{space_host_startup}.hf.space")
133
+ else:
134
+ logger.info("ℹ️ SPACE_HOST environment variable not found (running locally?).")
135
+
136
+ if space_id_startup:
137
+ logger.info(f"✅ SPACE_ID found: {space_id_startup}")
138
+ logger.info(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
139
+ logger.info(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
140
+ else:
141
+ logger.info("ℹ️ SPACE_ID environment variable not found. Repo URL cannot be determined.")
142
+
143
+ logger.info("-"*(60 + len(" App Starting ")) + "\n")
144
 
145
+ logger.info("Launching Gradio Interface for GAIA Agent Evaluation...")
146
+ demo.launch(debug=True, share=False)