Spaces:
Sleeping
Sleeping
File size: 6,254 Bytes
2f428df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import requests
import json
import os
from typing import List, Dict, Any
from gaia_agent import GaiaAgent
class GaiaSubmission:
def __init__(self, api_base_url: str, api_key: str = None):
self.api_base_url = api_base_url.rstrip('/')
self.api_key = api_key
self.agent = GaiaAgent()
self.headers = {'Content-Type': 'application/json'}
if api_key:
self.headers['Authorization'] = f'Bearer {api_key}'
def get_questions(self) -> List[Dict[str, Any]]:
"""Hämta alla frågor från API:et"""
try:
response = requests.get(f"{self.api_base_url}/questions", headers=self.headers)
response.raise_for_status()
return response.json()
except Exception as e:
print(f"Error fetching questions: {e}")
return []
def get_random_question(self) -> Dict[str, Any]:
"""Hämta en slumpmässig fråga"""
try:
response = requests.get(f"{self.api_base_url}/random-question", headers=self.headers)
response.raise_for_status()
return response.json()
except Exception as e:
print(f"Error fetching random question: {e}")
return {}
def download_file(self, task_id: str, file_path: str) -> bool:
"""Ladda ned en fil associerad med en uppgift"""
try:
response = requests.get(f"{self.api_base_url}/files/{task_id}", headers=self.headers)
response.raise_for_status()
with open(file_path, 'wb') as f:
f.write(response.content)
return True
except Exception as e:
print(f"Error downloading file for task {task_id}: {e}")
return False
def submit_answer(self, task_id: str, answer: str, reasoning_trace: str = "") -> Dict[str, Any]:
"""Skicka in svar till API:et"""
try:
submission = {
"task_id": task_id,
"model_answer": answer,
"reasoning_trace": reasoning_trace
}
response = requests.post(
f"{self.api_base_url}/submit",
headers=self.headers,
json=submission
)
response.raise_for_status()
return response.json()
except Exception as e:
print(f"Error submitting answer for task {task_id}: {e}")
return {"error": str(e)}
def process_single_question(self, question_data: Dict[str, Any]) -> Dict[str, Any]:
"""Bearbeta en enskild fråga"""
task_id = question_data.get('task_id')
question = question_data.get('question', '')
print(f"Processing task {task_id}: {question[:100]}...")
# Kontrollera om det finns associerade filer
if 'files' in question_data:
for file_info in question_data['files']:
file_name = file_info.get('filename')
if file_name:
success = self.download_file(task_id, file_name)
if success:
print(f"Downloaded file: {file_name}")
else:
print(f"Failed to download file: {file_name}")
# Bearbeta frågan med agenten
try:
answer, reasoning_trace = self.agent(question)
result = {
"task_id": task_id,
"question": question,
"answer": answer,
"reasoning_trace": reasoning_trace,
"status": "success"
}
print(f"Answer: {answer}")
return result
except Exception as e:
error_msg = f"Error processing question: {str(e)}"
print(error_msg)
return {
"task_id": task_id,
"question": question,
"answer": "",
"reasoning_trace": error_msg,
"status": "error"
}
def run_evaluation(self, submit_answers: bool = False) -> List[Dict[str, Any]]:
"""Kör utvärdering på alla frågor"""
questions = self.get_questions()
if not questions:
print("No questions retrieved. Exiting.")
return []
print(f"Retrieved {len(questions)} questions")
results = []
for i, question_data in enumerate(questions, 1):
print(f"\n--- Question {i}/{len(questions)} ---")
result = self.process_single_question(question_data)
results.append(result)
# Skicka in svar om det är aktiverat
if submit_answers and result['status'] == 'success':
submission_result = self.submit_answer(
result['task_id'],
result['answer'],
result['reasoning_trace']
)
result['submission_result'] = submission_result
print(f"Submission result: {submission_result}")
return results
def save_results(self, results: List[Dict[str, Any]], filename: str = "gaia_results.json"):
"""Spara resultat till fil"""
with open(filename, 'w', encoding='utf-8') as f:
json.dump(results, f, indent=2, ensure_ascii=False)
print(f"Results saved to {filename}")
def save_submission_format(self, results: List[Dict[str, Any]], filename: str = "gaia_submission.jsonl"):
"""Spara resultat i GAIA submission format"""
with open(filename, 'w', encoding='utf-8') as f:
for result in results:
if result['status'] == 'success':
submission_entry = {
"task_id": result['task_id'],
"model_answer": result['answer'],
"reasoning_trace": result['reasoning_trace']
}
f.write(json.dumps(submission_entry, ensure_ascii=False) + '\n')
print(f"Submission file saved to {filename}") |