Yago Bolivar
feat: add GAIA Agent and local testing scripts, including setup and requirements for development
2abc50d
# /Users/yagoairm2/Desktop/agents/final project/HF_Agents_Final_Project/app_local.py | |
""" | |
A simplified version of app2.py that works better for local development. | |
This version doesn't require OAuth authentication and uses a test username instead. | |
""" | |
import os | |
import sys | |
import gradio as gr | |
import requests | |
import pandas as pd | |
import json | |
import re | |
import time | |
import logging | |
import io | |
import contextlib | |
from typing import Dict, List, Optional, Union, Any | |
from pathlib import Path | |
try: | |
from dotenv import load_dotenv | |
except ImportError: | |
print("dotenv not found. Using os.environ only.") | |
def load_dotenv(): | |
pass | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s', | |
handlers=[logging.StreamHandler()] | |
) | |
logger = logging.getLogger(__name__) | |
# --- Constants --- | |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
DEFAULT_FILES_DIR = "dataset" | |
SYSTEM_PROMPT = """ | |
You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. | |
""" | |
# --- Mock Agent Implementation --- | |
class MockAgent: | |
"""A simple agent that returns mock answers for testing purposes.""" | |
def __init__(self): | |
logger.info("Initializing Mock Agent") | |
def __call__(self, question: str) -> str: | |
"""Return a mock answer based on the question content.""" | |
logger.info(f"Mock Agent received question: {question[:50]}...") | |
# Return different mock answers based on question content | |
if "how many" in question.lower(): | |
answer = "42" | |
elif "what is" in question.lower(): | |
answer = "Example answer for a what-is question" | |
elif "?" in question: | |
answer = "Yes, that is correct." | |
else: | |
answer = "This is a mock answer for testing purposes." | |
logger.info(f"Mock Agent returning answer: {answer}") | |
return answer | |
# --- Runner Function for Gradio Interface --- | |
def run_and_submit_all(test_username: str = "test_user"): | |
""" | |
Fetches all questions, runs the agent on them, submits answers, | |
and displays the results. | |
""" | |
if not test_username: | |
test_username = "test_user" | |
print(f"Using test username: {test_username}") | |
api_url = DEFAULT_API_URL | |
questions_url = f"{api_url}/questions" | |
submit_url = f"{api_url}/submit" | |
# 1. Instantiate Agent | |
try: | |
agent = MockAgent() # Use the mock agent for testing | |
except Exception as e: | |
print(f"Error instantiating agent: {e}") | |
return f"Error initializing agent: {e}", None | |
agent_code = "https://huggingface.co/spaces/test/test/tree/main" # Mock URL | |
# 2. Fetch Questions (or use local file for faster testing) | |
questions_file = "question_set/common_questions.json" | |
if os.path.exists(questions_file): | |
print(f"Using local questions file: {questions_file}") | |
try: | |
with open(questions_file, 'r') as f: | |
questions_data = json.load(f) | |
print(f"Loaded {len(questions_data)} questions from local file.") | |
# For testing, limit to just a few questions | |
questions_data = questions_data[:3] | |
print(f"Limited to first {len(questions_data)} questions for testing.") | |
except Exception as e: | |
print(f"Error loading questions from local file: {e}") | |
return f"Error loading questions from local file: {e}", None | |
else: | |
print(f"Fetching questions from: {questions_url}") | |
try: | |
response = requests.get(questions_url, timeout=15) | |
response.raise_for_status() | |
questions_data = response.json() | |
if not questions_data: | |
print("Fetched questions list is empty.") | |
return "Fetched questions list is empty or invalid format.", None | |
print(f"Fetched {len(questions_data)} questions.") | |
# For testing, limit to just a few questions | |
questions_data = questions_data[:3] | |
print(f"Limited to first {len(questions_data)} questions for testing.") | |
except Exception as e: | |
print(f"Error fetching questions: {e}") | |
return f"Error fetching questions: {e}", None | |
# 3. Run Agent | |
results_log = [] | |
answers_payload = [] | |
print(f"Running agent on {len(questions_data)} questions...") | |
for item in questions_data: | |
task_id = item.get("task_id") | |
question_text = item.get("Question") | |
if not task_id or question_text is None: | |
print(f"Skipping item with missing task_id or question") | |
continue | |
try: | |
submitted_answer = agent(question_text) | |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) | |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}) | |
except Exception as e: | |
print(f"Error running agent on task {task_id}: {e}") | |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"}) | |
if not answers_payload: | |
print("Agent did not produce any answers to submit.") | |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) | |
# 4. Prepare Submission | |
submission_data = {"username": test_username.strip(), "agent_code": agent_code, "answers": answers_payload} | |
status_update = f"Agent finished. Prepared {len(answers_payload)} answers for user '{test_username}'..." | |
print(status_update) | |
# 5. Show Results (but don't submit in local testing mode) | |
print("In local development mode - showing results without submitting") | |
final_status = ( | |
f"Local Testing Complete!\n" | |
f"User: {test_username}\n" | |
f"Generated {len(answers_payload)} answers\n" | |
f"Message: This is a local test - no answers were submitted to the API" | |
) | |
results_df = pd.DataFrame(results_log) | |
return final_status, results_df | |
# --- Simple Gradio Interface --- | |
with gr.Blocks() as demo: | |
gr.Markdown("# GAIA Agent Local Testing Interface") | |
gr.Markdown( | |
""" | |
**Local Development Version** | |
This is a simplified version of the agent interface for local testing. | |
It uses a mock agent implementation that returns test answers. | |
Enter a username below and click the button to run the agent on a few sample questions. | |
""" | |
) | |
test_username = gr.Textbox(label="Test Username", value="test_user") | |
run_button = gr.Button("Run Test Evaluation") | |
status_output = gr.Textbox(label="Run Status", lines=5, interactive=False) | |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) | |
run_button.click( | |
fn=run_and_submit_all, | |
inputs=[test_username], | |
outputs=[status_output, results_table] | |
) | |
if __name__ == "__main__": | |
print("\n" + "-"*30 + " GAIA Agent Local Testing " + "-"*30) | |
# Try to load environment variables (optional) | |
load_dotenv() | |
print("Launching Gradio Interface for local testing...") | |
demo.launch(debug=True, share=False) | |