Agent_Final_Assignment

Runtime error

App Files Files Community

hassenhamdi commited on Jun 30

Commit

1939b22

verified ·

1 Parent(s): 802dfde

Update agent.py

Browse files

Files changed (1) hide show

agent.py +149 -139

agent.py CHANGED Viewed

@@ -1,146 +1,156 @@
-import os
-import pandas as pd
-import gradio as gr
 import logging
-import time
-# Import the new Settings, Evaluator, and Runner classes
-from settings import Settings
-from evaluator import Evaluator
-from runner import Runner
-# Configure logging
-logging.basicConfig(level=logging.INFO, force=True)
 logger = logging.getLogger(__name__)
-# Initialize settings, evaluator, and runner
-settings = Settings()
-evaluator = Evaluator(settings)
-runner = Runner(settings)
-LOGIN_MESSAGE = "Please Login to Hugging Face with the button."
-EMPTY_RESULTS_TABLE = pd.DataFrame(columns=['task_id', 'question', 'answer'])
-def _format_elapsed_time(elapsed_time):
-    """Formats elapsed time into minutes and seconds."""
-    minutes = int(elapsed_time // 60)
-    seconds = elapsed_time % 60
-    if minutes > 0:
-        return f"Elapsed time: {minutes} minutes {seconds:.2f} seconds"
-    else:
-        return f"Elapsed time: {seconds:.2f} seconds"
-def _run_agent_on_questions(questions_list: list, username: str) -> tuple[str, pd.DataFrame]:
     """
-    Helper function to run the agent on a list of questions and return status and results.
     """
-    start_time = time.time()
-    logger.info(f"Starting agent run for user: {username} on {len(questions_list)} questions.")
-    # The runner handles the agent execution and saving of answers
-    question_answer_pairs_df = runner.run_agent(questions_list, username)
-    end_time = time.time()
-    elapsed_time_str = _format_elapsed_time(end_time - start_time)
-    message = f"Agent run complete. {elapsed_time_str}"
-    logger.info(message)
-    return message, question_answer_pairs_df
-def run_one(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
-    """Runs the agent on one random question."""
-    if profile:
-        try:
-            question = evaluator.get_one_question()
-            return _run_agent_on_questions([question], profile.username)
-        except Exception as e:
-            logger.error(f"Error getting one question: {e}")
-            return f"Error getting question: {e}", EMPTY_RESULTS_TABLE
-    else:
-        return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
-def run_all(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
-    """Runs the agent on all questions."""
-    if profile:
         try:
-            questions = evaluator.get_questions()
-            return _run_agent_on_questions(questions, profile.username)
         except Exception as e:
-            logger.error(f"Error getting all questions: {e}")
-            return f"Error getting questions: {e}", EMPTY_RESULTS_TABLE
-    else:
-        return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
-def submit(profile: gr.OAuthProfile | None) -> str:
-    """Submits cached answers for evaluation."""
-    if profile:
-        return evaluator.submit_answers(profile.username)
-    else:
-        return LOGIN_MESSAGE
-# --- Build Gradio Interface using Blocks ---
-with gr.Blocks() as demo:
-    gr.Markdown("# GAIA Agent Evaluation Runner")
-    gr.Markdown(
-        """
-        **Instructions:**
-        1.  Log in to your Hugging Face account using the button below.
-        2.  Click 'Get One Answer' to run the agent on a random question or 'Get All Answers' to run all.
-        3.  Click 'Submit Answers' to submit answers for evaluation. **Your HF username will be submitted for leaderboard tracking.**
-        ---
-        **Disclaimers:**
-        * Running 'Get All Answers' can take significant time as the agent processes all 20 questions.
-        * Agent logs are detailed (DEBUG level) and may appear interleaved due to parallel execution.
-        * The 'Submit Answers' button uses the most recent agent answers cached locally for your username.
-        * **API Keys Required:** Ensure `GEMINI_API_KEY` is set as a Space Secret (or environment variable if running locally).
-        """
-    )
-    gr.LoginButton()
-    run_one_button = gr.Button("Get One Answer")
-    run_all_button = gr.Button("Get All Answers")
-    submit_button = gr.Button("Submit Answers")
-    status_output = gr.Textbox(
-        label="Run Status / Submission Result", lines=5, interactive=False)
-    results_table = gr.DataFrame(
-        label="Questions and Agent Answers", wrap=True)
-    run_one_button.click(
-        fn=run_one, outputs=[status_output, results_table]
-    )
-    run_all_button.click(
-        fn=run_all, outputs=[status_output, results_table]
-    )
-    submit_button.click(
-        fn=submit, outputs=[status_output]
-    )
-if __name__ == "__main__":
-    logger.info("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
-    space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID")
-    if space_host_startup:
-        logger.info(f"✅ SPACE_HOST found: {space_host_startup}")
-        logger.info(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
-    else:
-        logger.info("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup:
-        logger.info(f"✅ SPACE_ID found: {space_id_startup}")
-        logger.info(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        logger.info(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
-    else:
-        logger.info("ℹ️  SPACE_ID environment variable not found. Repo URL cannot be determined.")
-    logger.info("-"*(60 + len(" App Starting ")) + "\n")
-    logger.info("Launching Gradio Interface for GAIA Agent Evaluation...")
-    demo.launch(debug=True, share=False)

+# agent.py
+import contextlib
+import io
 import logging
+import os
 logger = logging.getLogger(__name__)
+from models import GoogleModelID # Import GoogleModelID
+from settings import Settings
+from smolagents import OpenAIServerModel, CodeAgent, FinalAnswerTool # Changed from LiteLLMModel
+from smolagents import DuckDuckGoSearchTool, VisitWebpageTool # Changed from GoogleSearchTool
+from smolagents.local_python_executor import BASE_PYTHON_TOOLS
+from tools import GetTaskFileTool, VideoUnderstandingTool, AudioUnderstandingTool
+from tools import ChessBoardFENTool, BestChessMoveTool, ConvertChessMoveTool, ExcelParsingTool
+import json # Added for BASE_PYTHON_TOOLS
+import pandas as pd # Added for BASE_PYTHON_TOOLS
+# Extend BASE_PYTHON_TOOLS for the PythonInterpreterTool to have access to these
+BASE_PYTHON_TOOLS["open"] = open
+BASE_PYTHON_TOOLS["os"] = os
+BASE_PYTHON_TOOLS["io"] = io
+BASE_PYTHON_TOOLS["contextlib"] = contextlib
+BASE_PYTHON_TOOLS["exec"] = exec # Note: exec is powerful, use with caution in production
+BASE_PYTHON_TOOLS["json"] = json # For parsing JSON if needed by agent
+BASE_PYTHON_TOOLS["pd"] = pd # For pandas operations if needed by agent
+class ResearchAgent:
+    def __init__(self, settings: Settings):
+        self.agent = CodeAgent(
+            name="researcher",
+            description="A specialized agent for web research, video analysis, and audio understanding. Give it your query as an argument. Use 'duckduckgo_search_tool' for web searches, 'visit_webpage_tool' to read web page content, 'video_understanding_tool' for YouTube videos, and 'audio_understanding_tool' for local audio files.",
+            add_base_tools=False,
+            tools=[
+                DuckDuckGoSearchTool(), # Changed from GoogleSearchTool
+                VisitWebpageTool(max_output_length=100000),
+                VideoUnderstandingTool(settings, GoogleModelID.GEMINI_2_0_FLASH), # Still uses 2.0 Flash for specific multimodal tasks
+                AudioUnderstandingTool(settings, GoogleModelID.GEMINI_2_0_FLASH) # Still uses 2.0 Flash for specific multimodal tasks
+            ],
+            additional_authorized_imports=[
+                "unicodedata", "stat", "datetime", "random", "pandas", "itertools",
+                "math", "statistics", "queue", "time", "collections", "re", "os",
+                "json", "io", "urllib.parse"
+            ],
+            max_steps=15,
+            verbosity_level=2,
+            model=OpenAIServerModel( # Changed to OpenAIServerModel
+                model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
+                api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
+                api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
+                temperature=0.1,
+                timeout=180
+            )
+        )
+        logger.info("ResearchAgent initialized.")
+class ChessAgent:
+    def __init__(self, settings: Settings):
+        self.agent = CodeAgent(
+            name="chess_player",
+            description="Makes a chess move. Give it a query including board image filepath and player turn (black or white).",
+            add_base_tools=False,
+            tools=[
+                ChessBoardFENTool(),
+                BestChessMoveTool(settings),
+                ConvertChessMoveTool(settings, GoogleModelID.GEMINI_2_5_FLASH_PREVIEW), # Changed to Gemini Flash Preview
+            ],
+            additional_authorized_imports=[
+                "unicodedata", "stat", "datetime", "random", "pandas", "itertools",
+                "math", "statistics", "queue", "time", "collections", "re", "os",
+                "json", "urllib.parse"
+            ],
+            max_steps=10,
+            verbosity_level=2,
+            model=OpenAIServerModel( # Changed to OpenAIServerModel
+                model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
+                api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
+                api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
+                temperature=0.0,
+                timeout=180
+            )
+        )
+        logger.info("ChessAgent initialized.")
+class ManagerAgent:
     """
+    The main orchestrating agent that routes questions to specialized sub-agents
+    or handles them directly with its own tools.
     """
+    def __init__(self, settings: Settings):
+        self.settings = settings
+        self.researcher = ResearchAgent(settings).agent
+        self.chess_player = ChessAgent(settings).agent
+        # Main manager agent
+        self.agent = CodeAgent(
+            name="manager",
+            description=(
+                "You are a highly capable AI assistant designed to solve complex GAIA benchmark questions. "
+                "Your primary role is to route tasks to the most appropriate specialized agent: "
+                "'researcher' for general knowledge, web browsing, video, and audio understanding tasks, "
+                "or 'chess_player' for chess-related tasks. "
+                "If a task involves downloading a file, use 'get_task_file_tool' first. "
+                "If you have the final answer, use 'final_answer_tool'.\n\n"
+                "**Available Tools:**\n"
+                "- `get_task_file_tool(task_id: str, file_name: str)`: Downloads a file associated with a task.\n"
+                "- `final_answer_tool(answer: str)`: Use this when you have the exact final answer.\n\n"
+                "**Managed Agents:**\n"
+                "- `researcher(query: str)`: Use for questions requiring web search, video analysis, or audio analysis.\n"
+                "- `chess_player(query: str)`: Use for questions related to chess positions or moves.\n\n"
+                "Think step-by-step. If a task involves a file, use `get_task_file_tool` first to download it, then pass the file path to the appropriate sub-agent or tool."
+            ),
+            tools=[
+                GetTaskFileTool(settings),
+                FinalAnswerTool(),
+                ExcelParsingTool(settings) # Added ExcelParsingTool to ManagerAgent as it handles file paths
+            ],
+            model=OpenAIServerModel( # Changed to OpenAIServerModel
+                model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
+                api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
+                api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
+                temperature=0.0,
+                timeout=180
+            ),
+            managed_agents=[self.researcher, self.chess_player],
+            verbosity_level=2,
+            max_steps=20
+        )
+        logger.info("ManagerAgent initialized.")
+    def __call__(self, question_data: dict) -> str:
+        task_id = question_data.get("task_id", "N/A")
+        question_text = question_data.get("question", "")
+        file_name = question_data.get("file_name", "")
+        enriched_question = (
+            f"{question_text} "
+            f"task_id: {task_id}. "
+            f"Your final answer should be a number or as few words as possible. "
+            f"Only use abbreviations when the question calls for abbreviations. "
+            f"If needed, use a comma separated list of values; the comma is always followed by a space. "
+            f"Critically review your answer before making it the final answer. "
+            f"Double check the answer to make sure it meets all format requirements stated in the question. "
+        )
+        if file_name:
+            enriched_question = f"{enriched_question} file_name: {file_name} (use get_task_file_tool to fetch this file and then pass its path to the relevant tool/agent, or excel_parsing_tool if it's an Excel file)." # Updated prompt for Excel
+        logger.info(f"ManagerAgent received question (first 100 chars): {enriched_question[:100]}...")
         try:
+            final_answer = self.agent.run(enriched_question)
+            logger.info(f"ManagerAgent returning final answer: {final_answer}")
+            return final_answer
         except Exception as e:
+            logger.error(f"Error running ManagerAgent on task {task_id}: {e}")
+            return f"AGENT ERROR: {e}"