import os import gradio as gr import requests import pandas as pd from smolagents import ( CodeAgent, DuckDuckGoSearchTool, OpenAIServerModel, ) import traceback # Import traceback for detailed error logging import subprocess class PythonREPLTool: name = "python_repl" description = "Runs Python code and returns the output or error." def __init__(self, timeout=10): self.timeout = timeout def run(self, code: str) -> str: try: result = subprocess.run( ["python3", "-c", code], timeout=self.timeout, ) if result.returncode == 0: return result.stdout.strip() else: return f"Error:\n{result.stderr.strip()}" except subprocess.TimeoutExpired: return "Execution timed out." # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # --- Agent Definition --- class GaiaAgent: def __init__(self, openai_key: str): self.openai_key = openai_key # 1) Initialize the LLM-backed model self.model = OpenAIServerModel( model_id="gpt-4", # or "gpt-3.5-turbo" if you prefer api_key=self.openai_key, system_prompt=( "You are a meticulous AI agent. " "Always think in Python code using the available tools. " "Never answer without executing or checking with a tool. " "Use DuckDuckGoSearchTool for factual lookups. " "Use PythonREPLTool for calculations, string manipulation, and logical deductions. " "Respond with the final answer only. Do not include any extra explanation. " "Here are some examples of how to use the tools:" "# Example 1: Calculate the square root of 16\n" "# ```python\n" "# print(16**0.5)\n" "# ```\n" "# Example 2: Search for the capital of France\n" "# ```python\n" "# print(DuckDuckGoSearchTool(query='capital of France'))\n" "# ```\n" "# Example 3: Reverse a string\n" "# ```python\n" "# print('hello'[::-1])\n" "# ```\n" ) ) # 2) Define the tools self.search_tool = DuckDuckGoSearchTool() self.python_tool = PythonREPLTool(timeout=10) # Initialize PythonREPLTool # 3) Create the CodeAgent self.agent = CodeAgent( model=self.model, tools=[self.search_tool, self.python_tool], # Encourage the agent to think step-by-step in code max_steps=20 ), def __call__(self, question: str) -> str: try: return self.agent.run(question) except Exception as e: error_message = f"Agent execution failed: {e}\n{traceback.format_exc()}" print(error_message) # Log the error for debugging return "ERROR: Agent failed to answer." # Return a string, not an exception def run_and_submit_all(openai_key: str): # --- Login & Setup --- # if not profile: # return "Please log in to Hugging Face to submit your score.", None # username = profile.username.strip() username = "anonymous" # 1) Instantiate our improved agent try: agent = GaiaAgent(openai_key) except Exception as e: error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}" print(error_message) return f"Error initializing agent: {e}", None # 2) Fetch the GAIA questions questions_url = f"{DEFAULT_API_URL}/questions" try: resp = requests.get(questions_url, timeout=15) resp.raise_for_status() questions = resp.json() except Exception as e: error_message = f"Error fetching questions: {e}\n{traceback.format_exc()}" print(error_message) return f"Error fetching questions: {e}", None # 3) Run the agent on each question answers = [] log = [] for item in questions: tid = item["task_id"] q = item["question"] try: ans = agent(q) except Exception as e: error_message = f"Error processing question {tid}: {e}\n{traceback.format_exc()}" print(error_message) # Print full traceback ans = f"ERROR: {e}" answers.append({"task_id": tid, "submitted_answer": ans}) log.append({"Task ID": tid, "Question": q, "Answer": ans}) # 4) Submit submit_url = f"{DEFAULT_API_URL}/submit" payload = { "username": username, "agent_code": f"https://huggingface.co/spaces/kshitijthakkar/GaiaAgent/tree/main", "answers": answers, } try: res = requests.post(submit_url, json=payload, timeout=60) res.raise_for_status() data = res.json() status = ( f"✅ Submission Successful!\n" f"User: {data['username']}\n" f"Score: {data['score']}% ({data['correct_count']}/{data['total_attempted']})\n" f"Message: {data.get('message', '')}" ) except Exception as e: error_message = f"Submission failed: {e}\n{traceback.format_exc()}" print(error_message) status = f"Submission failed: {e}" return status, pd.DataFrame(log) # --- Gradio UI --- def run_test_questions(profile, openai_key, test_questions): if not profile: return "Please log in to Hugging Face to run the test questions.", None try: agent = GaiaAgent(openai_key) except Exception as e: error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}" print(error_message) return f"Error initializing agent: {e}", None log = [] for q in test_questions: try: ans = agent(q) except Exception as e: error_message = f"Error processing test question: {e}\n{traceback.format_exc()}" print(error_message) ans = f"ERROR: {e}" log.append({"Question": q, "Answer": ans}) return pd.DataFrame(log) with gr.Blocks() as demo: # Corrected to use gr.Blocks() gr.Markdown("# GAIA Benchmark Runner") gr.Markdown( "1. Clone this Space and customize your agent logic.\n" "2. Log in below (to get your HF username).\n" "3. Enter your OpenAI key (if needed).\n" "4. Click to run and submit to the leaderboard." ) login = gr.LoginButton() key_in = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...") run_btn = gr.Button("Run & Submit") out_status = gr.Textbox(label="Status", lines=4) out_table = gr.DataFrame(label="Questions & Answers") test_questions_input = gr.Textbox( label="Test Questions (comma-separated)", placeholder="What is the capital of France?, What is the square root of 25?", ) run_test_btn = gr.Button("Run Test Questions") test_results_output = gr.DataFrame(label="Test Results") run_btn.click(fn=run_and_submit_all, inputs=[key_in], outputs=[out_status, out_table]) run_test_btn.click( fn=run_test_questions, inputs=[login, key_in, test_questions_input], outputs=[test_results_output], ) if __name__ == "__main__": demo.launch(debug=True, share=False)