GaiaAgent

Sleeping

File size: 7,454 Bytes

import os
import gradio as gr
import requests
import pandas as pd
from smolagents import (
    CodeAgent,
    DuckDuckGoSearchTool,
    OpenAIServerModel,
)
import traceback  # Import traceback for detailed error logging

import subprocess

class PythonREPLTool:
    name = "python_repl"
    description = "Runs Python code and returns the output or error."


    def __init__(self, timeout=10):
        self.timeout = timeout

    def run(self, code: str) -> str:

        try:
            result = subprocess.run(
                ["python3", "-c", code],
                timeout=self.timeout,
            )
            if result.returncode == 0:
                return result.stdout.strip()
            else:
                return f"Error:\n{result.stderr.strip()}"
        except subprocess.TimeoutExpired:
            return "Execution timed out."

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


# --- Agent Definition ---
class GaiaAgent:
    def __init__(self, openai_key: str):
        self.openai_key = openai_key
        # 1) Initialize the LLM-backed model
        self.model = OpenAIServerModel(
            model_id="gpt-4",  # or "gpt-3.5-turbo" if you prefer
            api_key=self.openai_key,
            system_prompt=(
                "You are a meticulous AI agent. "
                "Always think in Python code using the available tools.  "
                "Never answer without executing or checking with a tool.  "
                "Use DuckDuckGoSearchTool for factual lookups. "
                "Use PythonREPLTool for calculations, string manipulation, and logical deductions. "
                "Respond with the final answer only. Do not include any extra explanation. "
                "Here are some examples of how to use the tools:"
                "# Example 1: Calculate the square root of 16\n"
                "# ```python\n"
                "# print(16**0.5)\n"
                "# ```\n"
                "# Example 2: Search for the capital of France\n"
                "# ```python\n"
                "# print(DuckDuckGoSearchTool(query='capital of France'))\n"
                "# ```\n"
                "# Example 3: Reverse a string\n"
                "# ```python\n"
                "# print('hello'[::-1])\n"
                "# ```\n"
              )
        )
        # 2) Define the tools
        self.search_tool = DuckDuckGoSearchTool()
        self.python_tool = PythonREPLTool(timeout=10)  # Initialize PythonREPLTool
        # 3) Create the CodeAgent
        self.agent = CodeAgent(
            model=self.model,
            tools=[self.search_tool, self.python_tool],
            # Encourage the agent to think step-by-step in code
            max_steps=20
            ),
        

    def __call__(self, question: str) -> str:
        try:
            return self.agent.run(question)
        except Exception as e:
            error_message = f"Agent execution failed: {e}\n{traceback.format_exc()}"
            print(error_message)  # Log the error for debugging
            return "ERROR: Agent failed to answer."  # Return a string, not an exception


def run_and_submit_all(openai_key: str):
    # --- Login & Setup ---
    # if not profile:
    #     return "Please log in to Hugging Face to submit your score.", None
    # username = profile.username.strip()
    username = "anonymous"
    # 1) Instantiate our improved agent
    try:
        agent = GaiaAgent(openai_key)
    except Exception as e:
        error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}"
        print(error_message)
        return f"Error initializing agent: {e}", None

    # 2) Fetch the GAIA questions
    questions_url = f"{DEFAULT_API_URL}/questions"
    try:
        resp = requests.get(questions_url, timeout=15)
        resp.raise_for_status()
        questions = resp.json()
    except Exception as e:
        error_message = f"Error fetching questions: {e}\n{traceback.format_exc()}"
        print(error_message)
        return f"Error fetching questions: {e}", None

    # 3) Run the agent on each question
    answers = []
    log = []
    for item in questions:
        tid = item["task_id"]
        q = item["question"]
        try:
            ans = agent(q)
        except Exception as e:
            error_message = f"Error processing question {tid}: {e}\n{traceback.format_exc()}"
            print(error_message)  # Print full traceback
            ans = f"ERROR: {e}"
        answers.append({"task_id": tid, "submitted_answer": ans})
        log.append({"Task ID": tid, "Question": q, "Answer": ans})

    # 4) Submit
    submit_url = f"{DEFAULT_API_URL}/submit"
    payload = {
        "username": username,
        "agent_code": f"https://huggingface.co/spaces/kshitijthakkar/GaiaAgent/tree/main",
        "answers": answers,
    }
    try:
        res = requests.post(submit_url, json=payload, timeout=60)
        res.raise_for_status()
        data = res.json()
        status = (
            f"✅ Submission Successful!\n"
            f"User: {data['username']}\n"
            f"Score: {data['score']}% ({data['correct_count']}/{data['total_attempted']})\n"
            f"Message: {data.get('message', '')}"
        )
    except Exception as e:
        error_message = f"Submission failed: {e}\n{traceback.format_exc()}"
        print(error_message)
        status = f"Submission failed: {e}"
    return status, pd.DataFrame(log)



# --- Gradio UI ---
def run_test_questions(profile, openai_key, test_questions):
    if not profile:
        return "Please log in to Hugging Face to run the test questions.", None

    try:
        agent = GaiaAgent(openai_key)
    except Exception as e:
        error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}"
        print(error_message)
        return f"Error initializing agent: {e}", None

    log = []
    for q in test_questions:
        try:
            ans = agent(q)
        except Exception as e:
            error_message = f"Error processing test question: {e}\n{traceback.format_exc()}"
            print(error_message)
            ans = f"ERROR: {e}"
        log.append({"Question": q, "Answer": ans})
    return pd.DataFrame(log)



with gr.Blocks() as demo: # Corrected to use gr.Blocks()
    gr.Markdown("# GAIA Benchmark Runner")
    gr.Markdown(
        "1. Clone this Space and customize your agent logic.\n"
        "2. Log in below (to get your HF username).\n"
        "3. Enter your OpenAI key (if needed).\n"
        "4. Click to run and submit to the leaderboard."
    )
    login = gr.LoginButton()
    key_in = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
    run_btn = gr.Button("Run & Submit")
    out_status = gr.Textbox(label="Status", lines=4)
    out_table = gr.DataFrame(label="Questions & Answers")

    test_questions_input = gr.Textbox(
        label="Test Questions (comma-separated)",
        placeholder="What is the capital of France?, What is the square root of 25?",
    )
    run_test_btn = gr.Button("Run Test Questions")
    test_results_output = gr.DataFrame(label="Test Results")

    run_btn.click(fn=run_and_submit_all, inputs=[key_in], outputs=[out_status, out_table])
    run_test_btn.click(
        fn=run_test_questions,
        inputs=[login, key_in, test_questions_input],
        outputs=[test_results_output],
    )

if __name__ == "__main__":
    demo.launch(debug=True, share=False)