File size: 3,792 Bytes
6ca25ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import os
from typing import Any, Dict, List, Tuple
import pandas as pd
import requests
def fetch_all_questions() -> List[Dict[str, Any]]:
"""Fetch all questions from the GAIA benchmark API."""
try:
# The actual endpoint will be provided by the GAIA benchmark
api_url = os.getenv("GAIA_API_URL", "")
if not api_url:
raise ValueError("GAIA_API_URL environment variable not set")
response = requests.get(f"{api_url}/questions")
response.raise_for_status()
questions = response.json()
return questions
except Exception as e:
raise Exception(f"Failed to fetch questions: {str(e)}")
def run_agent(agent: Any, questions: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
"""Run the agent on all questions and collect results.
Args:
agent: The GaiaAgent instance
questions: List of question data from the API
Returns:
Tuple containing:
- List of result logs for display
- List of answer payloads for submission
"""
results_log = []
answers_payload = []
for question in questions:
question_id = question.get("id", "unknown")
question_text = question.get("question", "")
try:
# Get answer from agent
answer = agent.get_answer(question)
# Log result
result_entry = {
"Question ID": question_id,
"Question": question_text,
"Answer": answer if answer else "No answer provided",
"Status": "Success" if answer else "Failed"
}
results_log.append(result_entry)
# Prepare submission payload if answer was generated
if answer:
answer_entry = {
"question_id": question_id,
"answer": answer
}
answers_payload.append(answer_entry)
except Exception as e:
# Log error
result_entry = {
"Question ID": question_id,
"Question": question_text,
"Answer": f"Error: {str(e)}",
"Status": "Failed"
}
results_log.append(result_entry)
return results_log, answers_payload
def submit_answers(submission_data: Dict[str, Any], results_log: List[Dict[str, Any]]) -> Tuple[str, pd.DataFrame]:
"""Submit answers to the GAIA benchmark API.
Args:
submission_data: Dictionary containing submission details
results_log: List of result logs for display
Returns:
Tuple containing:
- Status message string
- DataFrame of results for display
"""
try:
# The actual endpoint will be provided by the GAIA benchmark
api_url = os.getenv("GAIA_API_URL", "")
if not api_url:
raise ValueError("GAIA_API_URL environment variable not set")
# Submit answers
response = requests.post(
f"{api_url}/submit",
json=submission_data
)
response.raise_for_status()
# Create DataFrame for display
results_df = pd.DataFrame(results_log)
# Return success message and results
return "Answers submitted successfully!", results_df
except Exception as e:
# If submission fails, still show results but with error message
results_df = pd.DataFrame(results_log)
return f"Error submitting answers: {str(e)}", results_df |