|
import requests |
|
import json |
|
import os |
|
from typing import List, Dict, Any |
|
from gaia_agent import GaiaAgent |
|
|
|
class GaiaSubmission: |
|
def __init__(self, api_base_url: str, api_key: str = None): |
|
self.api_base_url = api_base_url.rstrip('/') |
|
self.api_key = api_key |
|
self.agent = GaiaAgent() |
|
self.headers = {'Content-Type': 'application/json'} |
|
|
|
if api_key: |
|
self.headers['Authorization'] = f'Bearer {api_key}' |
|
|
|
def get_questions(self) -> List[Dict[str, Any]]: |
|
"""Hämta alla frågor från API:et""" |
|
try: |
|
response = requests.get(f"{self.api_base_url}/questions", headers=self.headers) |
|
response.raise_for_status() |
|
return response.json() |
|
except Exception as e: |
|
print(f"Error fetching questions: {e}") |
|
return [] |
|
|
|
def get_random_question(self) -> Dict[str, Any]: |
|
"""Hämta en slumpmässig fråga""" |
|
try: |
|
response = requests.get(f"{self.api_base_url}/random-question", headers=self.headers) |
|
response.raise_for_status() |
|
return response.json() |
|
except Exception as e: |
|
print(f"Error fetching random question: {e}") |
|
return {} |
|
|
|
def download_file(self, task_id: str, file_path: str) -> bool: |
|
"""Ladda ned en fil associerad med en uppgift""" |
|
try: |
|
response = requests.get(f"{self.api_base_url}/files/{task_id}", headers=self.headers) |
|
response.raise_for_status() |
|
|
|
with open(file_path, 'wb') as f: |
|
f.write(response.content) |
|
|
|
return True |
|
except Exception as e: |
|
print(f"Error downloading file for task {task_id}: {e}") |
|
return False |
|
|
|
def submit_answer(self, task_id: str, answer: str, reasoning_trace: str = "") -> Dict[str, Any]: |
|
"""Skicka in svar till API:et""" |
|
try: |
|
submission = { |
|
"task_id": task_id, |
|
"model_answer": answer, |
|
"reasoning_trace": reasoning_trace |
|
} |
|
|
|
response = requests.post( |
|
f"{self.api_base_url}/submit", |
|
headers=self.headers, |
|
json=submission |
|
) |
|
response.raise_for_status() |
|
return response.json() |
|
|
|
except Exception as e: |
|
print(f"Error submitting answer for task {task_id}: {e}") |
|
return {"error": str(e)} |
|
|
|
def process_single_question(self, question_data: Dict[str, Any]) -> Dict[str, Any]: |
|
"""Bearbeta en enskild fråga""" |
|
task_id = question_data.get('task_id') |
|
question = question_data.get('question', '') |
|
|
|
print(f"Processing task {task_id}: {question[:100]}...") |
|
|
|
|
|
if 'files' in question_data: |
|
for file_info in question_data['files']: |
|
file_name = file_info.get('filename') |
|
if file_name: |
|
success = self.download_file(task_id, file_name) |
|
if success: |
|
print(f"Downloaded file: {file_name}") |
|
else: |
|
print(f"Failed to download file: {file_name}") |
|
|
|
|
|
try: |
|
answer, reasoning_trace = self.agent(question) |
|
|
|
result = { |
|
"task_id": task_id, |
|
"question": question, |
|
"answer": answer, |
|
"reasoning_trace": reasoning_trace, |
|
"status": "success" |
|
} |
|
|
|
print(f"Answer: {answer}") |
|
return result |
|
|
|
except Exception as e: |
|
error_msg = f"Error processing question: {str(e)}" |
|
print(error_msg) |
|
|
|
return { |
|
"task_id": task_id, |
|
"question": question, |
|
"answer": "", |
|
"reasoning_trace": error_msg, |
|
"status": "error" |
|
} |
|
|
|
def run_evaluation(self, submit_answers: bool = False) -> List[Dict[str, Any]]: |
|
"""Kör utvärdering på alla frågor""" |
|
questions = self.get_questions() |
|
if not questions: |
|
print("No questions retrieved. Exiting.") |
|
return [] |
|
|
|
print(f"Retrieved {len(questions)} questions") |
|
results = [] |
|
|
|
for i, question_data in enumerate(questions, 1): |
|
print(f"\n--- Question {i}/{len(questions)} ---") |
|
|
|
result = self.process_single_question(question_data) |
|
results.append(result) |
|
|
|
|
|
if submit_answers and result['status'] == 'success': |
|
submission_result = self.submit_answer( |
|
result['task_id'], |
|
result['answer'], |
|
result['reasoning_trace'] |
|
) |
|
result['submission_result'] = submission_result |
|
print(f"Submission result: {submission_result}") |
|
|
|
return results |
|
|
|
def save_results(self, results: List[Dict[str, Any]], filename: str = "gaia_results.json"): |
|
"""Spara resultat till fil""" |
|
with open(filename, 'w', encoding='utf-8') as f: |
|
json.dump(results, f, indent=2, ensure_ascii=False) |
|
|
|
print(f"Results saved to {filename}") |
|
|
|
def save_submission_format(self, results: List[Dict[str, Any]], filename: str = "gaia_submission.jsonl"): |
|
"""Spara resultat i GAIA submission format""" |
|
with open(filename, 'w', encoding='utf-8') as f: |
|
for result in results: |
|
if result['status'] == 'success': |
|
submission_entry = { |
|
"task_id": result['task_id'], |
|
"model_answer": result['answer'], |
|
"reasoning_trace": result['reasoning_trace'] |
|
} |
|
f.write(json.dumps(submission_entry, ensure_ascii=False) + '\n') |
|
|
|
print(f"Submission file saved to {filename}") |