Spaces:
Sleeping
Sleeping
import requests | |
import json | |
import os | |
from typing import List, Dict, Any | |
from gaia_agent import GaiaAgent | |
class GaiaSubmission: | |
def __init__(self, api_base_url: str, api_key: str = None): | |
self.api_base_url = api_base_url.rstrip('/') | |
self.api_key = api_key | |
self.agent = GaiaAgent() | |
self.headers = {'Content-Type': 'application/json'} | |
if api_key: | |
self.headers['Authorization'] = f'Bearer {api_key}' | |
def get_questions(self) -> List[Dict[str, Any]]: | |
"""Hämta alla frågor från API:et""" | |
try: | |
response = requests.get(f"{self.api_base_url}/questions", headers=self.headers) | |
response.raise_for_status() | |
return response.json() | |
except Exception as e: | |
print(f"Error fetching questions: {e}") | |
return [] | |
def get_random_question(self) -> Dict[str, Any]: | |
"""Hämta en slumpmässig fråga""" | |
try: | |
response = requests.get(f"{self.api_base_url}/random-question", headers=self.headers) | |
response.raise_for_status() | |
return response.json() | |
except Exception as e: | |
print(f"Error fetching random question: {e}") | |
return {} | |
def download_file(self, task_id: str, file_path: str) -> bool: | |
"""Ladda ned en fil associerad med en uppgift""" | |
try: | |
response = requests.get(f"{self.api_base_url}/files/{task_id}", headers=self.headers) | |
response.raise_for_status() | |
with open(file_path, 'wb') as f: | |
f.write(response.content) | |
return True | |
except Exception as e: | |
print(f"Error downloading file for task {task_id}: {e}") | |
return False | |
def submit_answer(self, task_id: str, answer: str, reasoning_trace: str = "") -> Dict[str, Any]: | |
"""Skicka in svar till API:et""" | |
try: | |
submission = { | |
"task_id": task_id, | |
"model_answer": answer, | |
"reasoning_trace": reasoning_trace | |
} | |
response = requests.post( | |
f"{self.api_base_url}/submit", | |
headers=self.headers, | |
json=submission | |
) | |
response.raise_for_status() | |
return response.json() | |
except Exception as e: | |
print(f"Error submitting answer for task {task_id}: {e}") | |
return {"error": str(e)} | |
def process_single_question(self, question_data: Dict[str, Any]) -> Dict[str, Any]: | |
"""Bearbeta en enskild fråga""" | |
task_id = question_data.get('task_id') | |
question = question_data.get('question', '') | |
print(f"Processing task {task_id}: {question[:100]}...") | |
# Kontrollera om det finns associerade filer | |
if 'files' in question_data: | |
for file_info in question_data['files']: | |
file_name = file_info.get('filename') | |
if file_name: | |
success = self.download_file(task_id, file_name) | |
if success: | |
print(f"Downloaded file: {file_name}") | |
else: | |
print(f"Failed to download file: {file_name}") | |
# Bearbeta frågan med agenten | |
try: | |
answer, reasoning_trace = self.agent(question) | |
result = { | |
"task_id": task_id, | |
"question": question, | |
"answer": answer, | |
"reasoning_trace": reasoning_trace, | |
"status": "success" | |
} | |
print(f"Answer: {answer}") | |
return result | |
except Exception as e: | |
error_msg = f"Error processing question: {str(e)}" | |
print(error_msg) | |
return { | |
"task_id": task_id, | |
"question": question, | |
"answer": "", | |
"reasoning_trace": error_msg, | |
"status": "error" | |
} | |
def run_evaluation(self, submit_answers: bool = False) -> List[Dict[str, Any]]: | |
"""Kör utvärdering på alla frågor""" | |
questions = self.get_questions() | |
if not questions: | |
print("No questions retrieved. Exiting.") | |
return [] | |
print(f"Retrieved {len(questions)} questions") | |
results = [] | |
for i, question_data in enumerate(questions, 1): | |
print(f"\n--- Question {i}/{len(questions)} ---") | |
result = self.process_single_question(question_data) | |
results.append(result) | |
# Skicka in svar om det är aktiverat | |
if submit_answers and result['status'] == 'success': | |
submission_result = self.submit_answer( | |
result['task_id'], | |
result['answer'], | |
result['reasoning_trace'] | |
) | |
result['submission_result'] = submission_result | |
print(f"Submission result: {submission_result}") | |
return results | |
def save_results(self, results: List[Dict[str, Any]], filename: str = "gaia_results.json"): | |
"""Spara resultat till fil""" | |
with open(filename, 'w', encoding='utf-8') as f: | |
json.dump(results, f, indent=2, ensure_ascii=False) | |
print(f"Results saved to {filename}") | |
def save_submission_format(self, results: List[Dict[str, Any]], filename: str = "gaia_submission.jsonl"): | |
"""Spara resultat i GAIA submission format""" | |
with open(filename, 'w', encoding='utf-8') as f: | |
for result in results: | |
if result['status'] == 'success': | |
submission_entry = { | |
"task_id": result['task_id'], | |
"model_answer": result['answer'], | |
"reasoning_trace": result['reasoning_trace'] | |
} | |
f.write(json.dumps(submission_entry, ensure_ascii=False) + '\n') | |
print(f"Submission file saved to {filename}") |