Agent_Final_Assignment

Runtime error

App Files Files Community

hassenhamdi commited on Jun 30

Commit

4ca33ca

verified ·

1 Parent(s): a959e70

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -286

app.py CHANGED Viewed

@@ -1,326 +1,149 @@
 import os
-import gradio as gr
-import requests
 import pandas as pd
-from smolagents import OpenAIServerModel
-from smolagents import CodeAgent, Tool, tool
-from smolagents import DuckDuckGoSearchTool, VisitWebpageTool
-from smolagents import PythonInterpreterTool
 import time
-from requests.exceptions import HTTPError
-# --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Tool Definitions ---
-class GaiaFileTool(Tool):
-    """
-    A smolagents.Tool subclass for downloading files from the GAIA API.
-    """
-    name = "download_gaia_file"
-    description = "Downloads a file associated with a given GAIA task ID and returns its content. It takes 'task_id' as input and returns the file content as a string. Use this when a question refers to an external file."
-    inputs = {"task_id": {"type": "string", "description": "The task ID for which to download the file (e.g., '2345')."}}
-    output_type = "string"
-    def __init__(self, api_base_url=DEFAULT_API_URL):
-        super().__init__()
-        self.api_base_url = api_base_url
-        print(f"GaiaFileTool initialized with API base URL: {self.api_base_url}")
-    def forward(self, task_id: str) -> str:
-        """
-        The core logic for the tool: downloads a file from the GAIA API.
-        This method is called by the agent when it uses this tool.
-        """
-        file_url = f"{self.api_base_url}/files/{task_id}"
-        print(f"Attempting to download file from: {file_url}")
-        try:
-            response = requests.get(file_url)
-            response.raise_for_status()
-            print(f"Successfully downloaded file for task_id {task_id}")
-            return response.text
-        except requests.exceptions.RequestException as e:
-            print(f"Error downloading file for task_id {task_id}: {e}")
-            return f"Error downloading file: {e}"
-# --- Custom GAIA Agent Definition ---
-class GaiaAgent(CodeAgent):
-    """
-    A smolagents-based agent designed to tackle GAIA Level 1 benchmark questions.
-    It uses Gemini Flash for reasoning and integrates a Python Interpreter, a
-    GAIA file download tool, and web browsing/searching tools.
-    """
-    def __init__(self):
-        print("GaiaAgent initializing...")
-        gemini_api_key = os.getenv("GEMINI_API_KEY")
-        if not gemini_api_key:
-            print("WARNING: GEMINI_API_KEY environment variable not set.")
-            print("Please set GEMINI_API_KEY for Gemini Flash to work.")
-        self.llm_model = OpenAIServerModel(
-            model_id="gemini-2.0-flash",
-            api_base="https://generativelanguage.googleapis.com/v1beta/openai/",
-            api_key=gemini_api_key,
-            temperature=0.1,
-        )
-        # Initialize GAIA file tool
-        gaia_file_tool_instance = GaiaFileTool()
-        # Initialize web searching and browsing tools
-        duckduckgo_search_tool = DuckDuckGoSearchTool()
-        visit_webpage_tool = VisitWebpageTool()
-        # Initialize the built-in Python Interpreter Tool
-        python_interpreter_tool = PythonInterpreterTool()
-        # Define the tools available to the agent
-        agent_tools = [
-            python_interpreter_tool,
-            gaia_file_tool_instance,
-            duckduckgo_search_tool,
-            visit_webpage_tool
-        ]
-        # Set verbosity_level directly to 2 for DEBUG logs
-        super().__init__(model=self.llm_model, tools=agent_tools, verbosity_level=2)
-        print("GaiaAgent initialized successfully with Gemini Flash and built-in tools.")
-    def __call__(self, question: str) -> str:
-        """
-        The main method for the agent to process a question and return an answer.
-        This will involve the agent's internal reasoning, tool use, and planning.
-        Includes retry logic for LLM calls to handle rate limits.
-        """
-        print(f"\n--- Agent received question (first 100 chars): {question[:100]}...")
-        prompt = (
-            f"You are an AI agent designed to solve GAIA benchmark questions. "
-            f"Your goal is to provide the exact answer as a string, without any additional text, "
-            f"explanation, or the phrase 'FINAL ANSWER:'. "
-            f"Break down the problem, use the available tools (python_interpreter, download_gaia_file, "
-            f"duckduckgo_search_tool, visit_webpage_tool) as needed, and think step-by-step. "
-            f"When using web search or webpage visit tools, be highly efficient. "
-            f"Formulate comprehensive search queries to get as much relevant information as possible in one go. "
-            f"Only visit a webpage if absolutely necessary and when you expect it to contain the direct answer or crucial data. "
-            f"Avoid redundant searches or visiting multiple pages for the same piece of information. "
-            f"Use 'python_interpreter' for any calculations or code execution. "
-            f"Use 'duckduckgo_search_tool' to find information on the web. "
-            f"Use 'visit_webpage_tool' to read the content of a specific URL. "
-            f"When you have the final answer, output ONLY the answer string.\n\n"
-            f"Question: {question}"
-        )
-        print(f"Agent running with prompt (first 200 chars): {prompt[:200]}...")
-        max_retries = 5
-        initial_retry_delay = 30
-        retry_delay = initial_retry_delay
-        result = None
-        for attempt in range(max_retries):
-            try:
-                result = self.run(prompt)
-                print(f"Agent raw output from self.run():\n{result}")
-                break # Break loop if successful
-            except HTTPError as e:
-                if e.response.status_code == 429:
-                    error_details = ""
-                    try:
-                        error_json = e.response.json()
-                        if 'error' in error_json and 'details' in error_json['error']:
-                            for detail in error_json['error']['details']:
-                                if detail.get('@type') == 'type.googleapis.com/google.rpc.QuotaFailure':
-                                    quota_metric = detail.get('quotaMetric', 'N/A')
-                                    quota_id = detail.get('quotaId', 'N/A')
-                                    quota_value = detail.get('quotaValue', 'N/A')
-                                    error_details = f"Quota Metric: {quota_metric}, Quota ID: {quota_id}, Value: {quota_value}. "
-                                    break
-                    except Exception as parse_error:
-                        print(f"Could not parse detailed error from 429 response: {parse_error}")
-                        error_details = "Check Google Cloud Console for details. "
-                    error_message = (
-                        f"Gemini API Rate limit hit (429) on attempt {attempt + 1}/{max_retries}. "
-                        f"{error_details}"
-                        f"Retrying in {retry_delay} seconds... "
-                        f"This could be due to the 15 RPM or 200 RPD free tier limits. "
-                        f"If this persists, your daily quota might be exhausted."
-                    )
-                    print(error_message)
-                    time.sleep(retry_delay)
-                    retry_delay *= 2
-                else:
-                    raise
-            except Exception as e:
-                import traceback
-                print(f"--- Error during agent execution on attempt {attempt + 1}/{max_retries}: {e}")
-                traceback.print_exc()
-                if attempt < max_retries - 1:
-                    print(f"Retrying in {retry_delay} seconds...")
-                    time.sleep(retry_delay)
-                    retry_delay *= 2
-                else:
-                    return "Agent encountered an error and could not provide an answer after multiple retries."
-        if result is None:
-            return "Agent failed after multiple retries due to an unknown error or persistent rate limits."
-        final_answer = self._extract_exact_answer(result)
-        print(f"--- Agent returning final answer (first 100 chars): {final_answer[:100]}...")
-        return final_answer
-    def _extract_exact_answer(self, raw_output: str) -> str:
-        """
-        Extracts and formats the exact answer from the agent's raw output.
-        Ensures no "FINAL ANSWER" text is included and handles any
-        extraneous formatting. This function is crucial for GAIA's exact match scoring.
-        """
-        print(f"Attempting to extract exact answer from raw output (first 200 chars):\n{raw_output[:200]}...")
-        cleaned_output = raw_output.replace("FINAL ANSWER:", "").strip()
-        cleaned_output = cleaned_output.replace("Answer:", "").strip()
-        cleaned_output = cleaned_output.replace("The answer is:", "").strip()
-        cleaned_output = cleaned_output.replace("```python", "").replace("```", "").strip()
-        lines = cleaned_output.split('\n')
-        if lines:
-            potential_answer = lines[-1].strip()
-            if len(potential_answer) < 5 or "tool_code" in potential_answer.lower():
-                for line in reversed(lines[:-1]):
-                    if line.strip() and "tool_code" not in line.lower():
-                        potential_answer = line.strip()
-                        break
-            cleaned_output = potential_answer
-        if cleaned_output.startswith('"') and cleaned_output.endswith('"'):
-            cleaned_output = cleaned_output[1:-1]
-        if cleaned_output.startswith("'") and cleaned_output.endswith("'"):
-            cleaned_output = cleaned_output[1:-1]
-        print(f"Extracted and cleaned answer: {cleaned_output[:100]}...")
-        return cleaned_output.strip()
-# --- Gradio Application Logic ---
-def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the GaiaAgent on them, submits all answers,
-    and displays the results.
     """
-    space_id = os.getenv("SPACE_ID")
-    if profile:
-        username = f"{profile.username}"
-        print(f"User logged in: {username}")
     else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
-    try:
-        agent = GaiaAgent()
-    except Exception as e:
-        print(f"Error during agent initialization in run_and_submit_all: {e}")
-        import traceback
-        traceback.print_exc()
-        return f"Error initializing agent: {e}", None
-    try:
-        print(f"Fetching questions from: {questions_url}")
-        questions_response = requests.get(questions_url)
-        questions_response.raise_for_status()
-        questions = questions_response.json()
-        print(f"Fetched {len(questions)} questions.")
-    except requests.exceptions.RequestException as e:
-        print(f"Error fetching questions: {e}")
-        return f"Error fetching questions: {e}", None
-    all_answers = []
-    results_data = []
-    for i, q_data in enumerate(questions):
-        task_id = q_data.get("task_id", f"unknown_{i}")
-        question_text = q_data.get("question", "No question text found.")
-        print(f"\n--- Processing Task ID: {task_id} ---")
-        print(f"Question: {question_text[:100]}...")
-        agent_answer = agent(question_text)
-        all_answers.append({"task_id": task_id, "answer": agent_answer})
-        results_data.append({
-            "Task ID": task_id,
-            "Question": question_text,
-            "Agent Answer": agent_answer
-        })
-        print(f"--- Finished processing Task ID: {task_id} ---")
-    try:
-        print(f"\nSubmitting {len(all_answers)} answers to: {submit_url}")
-        submission_payload = {
-            "username": username,
-            "code_link": f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_execution",
-            "answers": all_answers
-        }
-        submit_response = requests.post(submit_url, json=submission_payload)
-        submit_response.raise_for_status()
-        submission_result = submit_response.json()
-        print(f"Submission successful: {submission_result}")
-        status_message = f"Submission successful!\nScore: {submission_result.get('score', 'N/A')}\nDetails: {submission_result.get('message', 'No message')}"
-    except requests.exceptions.RequestException as e:
-        print(f"Error submitting answers: {e}")
-        status_message = f"Error submitting answers: {e}"
-    results_df = pd.DataFrame(results_data)
-    return status_message, results_df
-# --- Gradio UI ---
 with gr.Blocks() as demo:
     gr.Markdown(
         """
-        # GAIA Level 1 Agent Evaluation
-        This application allows you to run your `smolagents`-based agent on the GAIA Level 1 benchmark
-        and submit your answers to the leaderboard.
-        **Important:**
-        1. **Login to Hugging Face** using the button below to submit your score.
-        2. **Set `GEMINI_API_KEY`**: Ensure your `GEMINI_API_KEY` is set as a Space Secret
-           in Hugging Face Spaces (or as an environment variable if running locally)
-           for the Gemini Flash model to function.
         """
     )
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup:
-        print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
     else:
-        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
     if space_id_startup:
-        print(f"✅ SPACE_ID found: {space_id_startup}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
-        print("ℹ️  SPACE_ID environment variable not found. Code link might be incorrect for submission.")
-    demo.launch()

 import os
 import pandas as pd
+import gradio as gr
+import logging
 import time
+# Import the new Settings, Evaluator, and Runner classes
+from settings import Settings
+from evaluator import Evaluator
+from runner import Runner
+# Configure logging
+logging.basicConfig(level=logging.INFO, force=True)
+logger = logging.getLogger(__name__)
+# Initialize settings, evaluator, and runner
+settings = Settings()
+evaluator = Evaluator(settings)
+runner = Runner(settings)
+LOGIN_MESSAGE = "Please Login to Hugging Face with the button."
+EMPTY_RESULTS_TABLE = pd.DataFrame(columns=['task_id', 'question', 'answer'])
+def _format_elapsed_time(elapsed_time):
+    """Formats elapsed time into minutes and seconds."""
+    minutes = int(elapsed_time // 60)
+    seconds = elapsed_time % 60
+    if minutes > 0:
+        return f"Elapsed time: {minutes} minutes {seconds:.2f} seconds"
+    else:
+        return f"Elapsed time: {seconds:.2f} seconds"
+def _run_agent_on_questions(questions_list: list, username: str) -> tuple[str, pd.DataFrame]:
     """
+    Helper function to run the agent on a list of questions and return status and results.
     """
+    start_time = time.time()
+    logger.info(f"Starting agent run for user: {username} on {len(questions_list)} questions.")
+    # The runner handles the agent execution and saving of answers
+    question_answer_pairs_df = runner.run_agent(questions_list, username)
+    end_time = time.time()
+    elapsed_time_str = _format_elapsed_time(end_time - start_time)
+    message = f"Agent run complete. {elapsed_time_str}"
+    logger.info(message)
+    return message, question_answer_pairs_df
+def run_one(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
+    """Runs the agent on one random question."""
+    if profile:
+        try:
+            question = evaluator.get_one_question()
+            return _run_agent_on_questions([question], profile.username)
+        except Exception as e:
+            logger.error(f"Error getting one question: {e}")
+            return f"Error getting question: {e}", EMPTY_RESULTS_TABLE
     else:
+        return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
+def run_all(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
+    """Runs the agent on all questions."""
+    if profile:
+        try:
+            questions = evaluator.get_questions()
+            return _run_agent_on_questions(questions, profile.username)
+        except Exception as e:
+            logger.error(f"Error getting all questions: {e}")
+            return f"Error getting questions: {e}", EMPTY_RESULTS_TABLE
+    else:
+        return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
+def submit(profile: gr.OAuthProfile | None) -> str:
+    """Submits cached answers for evaluation."""
+    if profile:
+        return evaluator.submit_answers(profile.username)
+    else:
+        return LOGIN_MESSAGE
+# --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Evaluation Runner")
     gr.Markdown(
         """
+        **Instructions:**
+        1.  Log in to your Hugging Face account using the button below.
+        2.  Click 'Get One Answer' to run the agent on a random question or 'Get All Answers' to run all.
+        3.  Click 'Submit Answers' to submit answers for evaluation. **Your HF username will be submitted for leaderboard tracking.**
+        ---
+        **Disclaimers:**
+        * Running 'Get All Answers' can take significant time as the agent processes all 20 questions.
+        * Agent logs are detailed (DEBUG level) and may appear interleaved due to parallel execution.
+        * The 'Submit Answers' button uses the most recent agent answers cached locally for your username.
+        * **API Keys Required:** Ensure `GEMINI_API_KEY` is set as a Space Secret (or environment variable if running locally).
         """
     )
     gr.LoginButton()
+    run_one_button = gr.Button("Get One Answer")
+    run_all_button = gr.Button("Get All Answers")
+    submit_button = gr.Button("Submit Answers")
+    status_output = gr.Textbox(
+        label="Run Status / Submission Result", lines=5, interactive=False)
+    results_table = gr.DataFrame(
+        label="Questions and Agent Answers", wrap=True)
+    run_one_button.click(
+        fn=run_one, outputs=[status_output, results_table]
+    )
+    run_all_button.click(
+        fn=run_all, outputs=[status_output, results_table]
+    )
+    submit_button.click(
+        fn=submit, outputs=[status_output]
     )
 if __name__ == "__main__":
+    logger.info("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup:
+        logger.info(f"✅ SPACE_HOST found: {space_host_startup}")
+        logger.info(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
     else:
+        logger.info("ℹ️  SPACE_HOST environment variable not found (running locally?).")
     if space_id_startup:
+        logger.info(f"✅ SPACE_ID found: {space_id_startup}")
+        logger.info(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        logger.info(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
+        logger.info("ℹ️  SPACE_ID environment variable not found. Repo URL cannot be determined.")
+    logger.info("-"*(60 + len(" App Starting ")) + "\n")
+    logger.info("Launching Gradio Interface for GAIA Agent Evaluation...")
+    demo.launch(debug=True, share=False)