Agent_Final_Assignment

Runtime error

App Files Files Community

hassenhamdi commited on Jun 30

Commit

802dfde

verified ·

1 Parent(s): 9ef9270

Update agent.py

Browse files

Files changed (1) hide show

agent.py +138 -149

agent.py CHANGED Viewed

@@ -1,157 +1,146 @@
-# agent.py
-import contextlib
-import io
-import logging
 import os
-logger = logging.getLogger(__name__)
-from models import GoogleModelID # Import GoogleModelID
 from settings import Settings
-from smolagents import OpenAIServerModel, CodeAgent, FinalAnswerTool # Changed from LiteLLMModel
-from smolagents import DuckDuckGoSearchTool, VisitWebpageTool # Changed from GoogleSearchTool
-from smolagents.local_python_executor import BASE_PYTHON_TOOLS
-from tools import GetTaskFileTool, VideoUnderstandingTool, AudioUnderstandingTool
-from tools import ChessBoardFENTool, BestChessMoveTool, ConvertChessMoveTool, ExcelParsingTool
-import json # Added for BASE_PYTHON_TOOLS
-import pandas as pd # Added for BASE_PYTHON_TOOLS
-# Extend BASE_PYTHON_TOOLS for the PythonInterpreterTool to have access to these
-BASE_PYTHON_TOOLS["open"] = open
-BASE_PYTHON_TOOLS["os"] = os
-BASE_PYTHON_TOOLS["io"] = io
-BASE_PYTHON_TOOLS["contextlib"] = contextlib
-BASE_PYTHON_TOOLS["exec"] = exec # Note: exec is powerful, use with caution in production
-BASE_PYTHON_TOOLS["json"] = json # For parsing JSON if needed by agent
-BASE_PYTHON_TOOLS["pd"] = pd # For pandas operations if needed by agent
-class ResearchAgent:
-    def __init__(self, settings: Settings):
-        self.agent = CodeAgent(
-            name="researcher",
-            description="A specialized agent for web research, video analysis, and audio understanding. Give it your query as an argument. Use 'duckduckgo_search_tool' for web searches, 'visit_webpage_tool' to read web page content, 'video_understanding_tool' for YouTube videos, and 'audio_understanding_tool' for local audio files.",
-            add_base_tools=False,
-            tools=[
-                DuckDuckGoSearchTool(), # Changed from GoogleSearchTool
-                VisitWebpageTool(max_output_length=100000),
-                VideoUnderstandingTool(settings, GoogleModelID.GEMINI_2_0_FLASH), # Still uses 2.0 Flash for specific multimodal tasks
-                AudioUnderstandingTool(settings, GoogleModelID.GEMINI_2_0_FLASH) # Still uses 2.0 Flash for specific multimodal tasks
-            ],
-            additional_authorized_imports=[
-                "unicodedata", "stat", "datetime", "random", "pandas", "itertools",
-                "math", "statistics", "queue", "time", "collections", "re", "os",
-                "json", "io", "urllib.parse"
-            ],
-            max_steps=15,
-            verbosity_level=2,
-            model=OpenAIServerModel( # Changed to OpenAIServerModel
-                model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
-                api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
-                api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
-                temperature=0.1,
-                timeout=180
-            )
-        )
-        logger.info("ResearchAgent initialized.")
-class ChessAgent:
-    def __init__(self, settings: Settings):
-        self.agent = CodeAgent(
-            name="chess_player",
-            description="Makes a chess move. Give it a query including board image filepath and player turn (black or white).",
-            add_base_tools=False,
-            tools=[
-                ChessBoardFENTool(),
-                BestChessMoveTool(settings),
-                ConvertChessMoveTool(settings, GoogleModelID.GEMINI_2_5_FLASH_PREVIEW), # Changed to Gemini Flash Preview
-            ],
-            additional_authorized_imports=[
-                "unicodedata", "stat", "datetime", "random", "pandas", "itertools",
-                "math", "statistics", "queue", "time", "collections", "re", "os",
-                "json", "urllib.parse"
-            ],
-            max_steps=10,
-            verbosity_level=2,
-            model=OpenAIServerModel( # Changed to OpenAIServerModel
-                model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
-                api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
-                api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
-                temperature=0.0,
-                timeout=180
-            )
-        )
-        logger.info("ChessAgent initialized.")
-class ManagerAgent:
     """
-    The main orchestrating agent that routes questions to specialized sub-agents
-    or handles them directly with its own tools.
     """
-    def __init__(self, settings: Settings):
-        self.settings = settings
-        self.researcher = ResearchAgent(settings).agent
-        self.chess_player = ChessAgent(settings).agent
-        # Main manager agent
-        self.agent = CodeAgent(
-            name="manager",
-            description=(
-                "You are a highly capable AI assistant designed to solve complex GAIA benchmark questions. "
-                "Your primary role is to route tasks to the most appropriate specialized agent: "
-                "'researcher' for general knowledge, web browsing, video, and audio understanding tasks, "
-                "or 'chess_player' for chess-related tasks. "
-                "If a task involves downloading a file, use 'get_task_file_tool' first. "
-                "If you have the final answer, use 'final_answer_tool'.\n\n"
-                "**Available Tools:**\n"
-                "- `get_task_file_tool(task_id: str, file_name: str)`: Downloads a file associated with a task.\n"
-                "- `final_answer_tool(answer: str)`: Use this when you have the exact final answer.\n\n"
-                "**Managed Agents:**\n"
-                "- `researcher(query: str)`: Use for questions requiring web search, video analysis, or audio analysis.\n"
-                "- `chess_player(query: str)`: Use for questions related to chess positions or moves.\n\n"
-                "Think step-by-step. If a task involves a file, use `get_task_file_tool` first to download it, then pass the file path to the appropriate sub-agent or tool."
-            ),
-            tools=[
-                GetTaskFileTool(settings),
-                FinalAnswerTool(),
-                ExcelParsingTool(settings) # Added ExcelParsingTool to ManagerAgent as it handles file paths
-            ],
-            model=OpenAIServerModel( # Changed to OpenAIServerModel
-                model_id=GoogleModelID.GEMINI_2_5_FLASH_PREVIEW, # Set to GEMINI_2_5_FLASH_PREVIEW
-                api_base="https://generativelanguage.googleapis.com/v1beta/openai/", # Gemini API base
-                api_key = settings.gemini_api_key.get_secret_value(), # Use Gemini API key
-                temperature=0.0,
-                timeout=180
-            ),
-            managed_agents=[self.researcher, self.chess_player],
-            verbosity_level=2,
-            max_steps=20
-        )
-        logger.info("ManagerAgent initialized.")
-    def __call__(self, question_data: dict) -> str:
-        task_id = question_data.get("task_id", "N/A")
-        question_text = question_data.get("question", "")
-        file_name = question_data.get("file_name", "")
-        enriched_question = (
-            f"{question_text} "
-            f"task_id: {task_id}. "
-            f"Your final answer should be a number or as few words as possible. "
-            f"Only use abbreviations when the question calls for abbreviations. "
-            f"If needed, use a comma separated list of values; the comma is always followed by a space. "
-            f"Critically review your answer before making it the final answer. "
-            f"Double check the answer to make sure it meets all format requirements stated in the question. "
-        )
-        if file_name:
-            enriched_question = f"{enriched_question} file_name: {file_name} (use get_task_file_tool to fetch this file and then pass its path to the relevant tool/agent, or excel_parsing_tool if it's an Excel file)." # Updated prompt for Excel
-        logger.info(f"ManagerAgent received question (first 100 chars): {enriched_question[:100]}...")
         try:
-            final_answer = self.agent.run(enriched_question)
-            logger.info(f"ManagerAgent returning final answer: {final_answer}")
-            return final_answer
         except Exception as e:
-            logger.error(f"Error running ManagerAgent on task {task_id}: {e}")
-            return f"AGENT ERROR: {e}"

 import os
+import pandas as pd
+import gradio as gr
+import logging
+import time
+# Import the new Settings, Evaluator, and Runner classes
 from settings import Settings
+from evaluator import Evaluator
+from runner import Runner
+# Configure logging
+logging.basicConfig(level=logging.INFO, force=True)
+logger = logging.getLogger(__name__)
+# Initialize settings, evaluator, and runner
+settings = Settings()
+evaluator = Evaluator(settings)
+runner = Runner(settings)
+LOGIN_MESSAGE = "Please Login to Hugging Face with the button."
+EMPTY_RESULTS_TABLE = pd.DataFrame(columns=['task_id', 'question', 'answer'])
+def _format_elapsed_time(elapsed_time):
+    """Formats elapsed time into minutes and seconds."""
+    minutes = int(elapsed_time // 60)
+    seconds = elapsed_time % 60
+    if minutes > 0:
+        return f"Elapsed time: {minutes} minutes {seconds:.2f} seconds"
+    else:
+        return f"Elapsed time: {seconds:.2f} seconds"
+def _run_agent_on_questions(questions_list: list, username: str) -> tuple[str, pd.DataFrame]:
     """
+    Helper function to run the agent on a list of questions and return status and results.
     """
+    start_time = time.time()
+    logger.info(f"Starting agent run for user: {username} on {len(questions_list)} questions.")
+    # The runner handles the agent execution and saving of answers
+    question_answer_pairs_df = runner.run_agent(questions_list, username)
+    end_time = time.time()
+    elapsed_time_str = _format_elapsed_time(end_time - start_time)
+    message = f"Agent run complete. {elapsed_time_str}"
+    logger.info(message)
+    return message, question_answer_pairs_df
+def run_one(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
+    """Runs the agent on one random question."""
+    if profile:
         try:
+            question = evaluator.get_one_question()
+            return _run_agent_on_questions([question], profile.username)
         except Exception as e:
+            logger.error(f"Error getting one question: {e}")
+            return f"Error getting question: {e}", EMPTY_RESULTS_TABLE
+    else:
+        return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
+def run_all(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
+    """Runs the agent on all questions."""
+    if profile:
+        try:
+            questions = evaluator.get_questions()
+            return _run_agent_on_questions(questions, profile.username)
+        except Exception as e:
+            logger.error(f"Error getting all questions: {e}")
+            return f"Error getting questions: {e}", EMPTY_RESULTS_TABLE
+    else:
+        return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
+def submit(profile: gr.OAuthProfile | None) -> str:
+    """Submits cached answers for evaluation."""
+    if profile:
+        return evaluator.submit_answers(profile.username)
+    else:
+        return LOGIN_MESSAGE
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1.  Log in to your Hugging Face account using the button below.
+        2.  Click 'Get One Answer' to run the agent on a random question or 'Get All Answers' to run all.
+        3.  Click 'Submit Answers' to submit answers for evaluation. **Your HF username will be submitted for leaderboard tracking.**
+        ---
+        **Disclaimers:**
+        * Running 'Get All Answers' can take significant time as the agent processes all 20 questions.
+        * Agent logs are detailed (DEBUG level) and may appear interleaved due to parallel execution.
+        * The 'Submit Answers' button uses the most recent agent answers cached locally for your username.
+        * **API Keys Required:** Ensure `GEMINI_API_KEY` is set as a Space Secret (or environment variable if running locally).
+        """
+    )
+    gr.LoginButton()
+    run_one_button = gr.Button("Get One Answer")
+    run_all_button = gr.Button("Get All Answers")
+    submit_button = gr.Button("Submit Answers")
+    status_output = gr.Textbox(
+        label="Run Status / Submission Result", lines=5, interactive=False)
+    results_table = gr.DataFrame(
+        label="Questions and Agent Answers", wrap=True)
+    run_one_button.click(
+        fn=run_one, outputs=[status_output, results_table]
+    )
+    run_all_button.click(
+        fn=run_all, outputs=[status_output, results_table]
+    )
+    submit_button.click(
+        fn=submit, outputs=[status_output]
+    )
+if __name__ == "__main__":
+    logger.info("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")
+    if space_host_startup:
+        logger.info(f"✅ SPACE_HOST found: {space_host_startup}")
+        logger.info(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        logger.info("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:
+        logger.info(f"✅ SPACE_ID found: {space_id_startup}")
+        logger.info(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        logger.info(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        logger.info("ℹ️  SPACE_ID environment variable not found. Repo URL cannot be determined.")
+    logger.info("-"*(60 + len(" App Starting ")) + "\n")
+    logger.info("Launching Gradio Interface for GAIA Agent Evaluation...")
+    demo.launch(debug=True, share=False)