import requests import json import os from typing import List, Dict, Any from gaia_agent import GaiaAgent class GaiaSubmission: def __init__(self, api_base_url: str, api_key: str = None): self.api_base_url = api_base_url.rstrip('/') self.api_key = api_key self.agent = GaiaAgent() self.headers = {'Content-Type': 'application/json'} if api_key: self.headers['Authorization'] = f'Bearer {api_key}' def get_questions(self) -> List[Dict[str, Any]]: """Hämta alla frågor från API:et""" try: response = requests.get(f"{self.api_base_url}/questions", headers=self.headers) response.raise_for_status() return response.json() except Exception as e: print(f"Error fetching questions: {e}") return [] def get_random_question(self) -> Dict[str, Any]: """Hämta en slumpmässig fråga""" try: response = requests.get(f"{self.api_base_url}/random-question", headers=self.headers) response.raise_for_status() return response.json() except Exception as e: print(f"Error fetching random question: {e}") return {} def download_file(self, task_id: str, file_path: str) -> bool: """Ladda ned en fil associerad med en uppgift""" try: response = requests.get(f"{self.api_base_url}/files/{task_id}", headers=self.headers) response.raise_for_status() with open(file_path, 'wb') as f: f.write(response.content) return True except Exception as e: print(f"Error downloading file for task {task_id}: {e}") return False def submit_answer(self, task_id: str, answer: str, reasoning_trace: str = "") -> Dict[str, Any]: """Skicka in svar till API:et""" try: submission = { "task_id": task_id, "model_answer": answer, "reasoning_trace": reasoning_trace } response = requests.post( f"{self.api_base_url}/submit", headers=self.headers, json=submission ) response.raise_for_status() return response.json() except Exception as e: print(f"Error submitting answer for task {task_id}: {e}") return {"error": str(e)} def process_single_question(self, question_data: Dict[str, Any]) -> Dict[str, Any]: """Bearbeta en enskild fråga""" task_id = question_data.get('task_id') question = question_data.get('question', '') print(f"Processing task {task_id}: {question[:100]}...") # Kontrollera om det finns associerade filer if 'files' in question_data: for file_info in question_data['files']: file_name = file_info.get('filename') if file_name: success = self.download_file(task_id, file_name) if success: print(f"Downloaded file: {file_name}") else: print(f"Failed to download file: {file_name}") # Bearbeta frågan med agenten try: answer, reasoning_trace = self.agent(question) result = { "task_id": task_id, "question": question, "answer": answer, "reasoning_trace": reasoning_trace, "status": "success" } print(f"Answer: {answer}") return result except Exception as e: error_msg = f"Error processing question: {str(e)}" print(error_msg) return { "task_id": task_id, "question": question, "answer": "", "reasoning_trace": error_msg, "status": "error" } def run_evaluation(self, submit_answers: bool = False) -> List[Dict[str, Any]]: """Kör utvärdering på alla frågor""" questions = self.get_questions() if not questions: print("No questions retrieved. Exiting.") return [] print(f"Retrieved {len(questions)} questions") results = [] for i, question_data in enumerate(questions, 1): print(f"\n--- Question {i}/{len(questions)} ---") result = self.process_single_question(question_data) results.append(result) # Skicka in svar om det är aktiverat if submit_answers and result['status'] == 'success': submission_result = self.submit_answer( result['task_id'], result['answer'], result['reasoning_trace'] ) result['submission_result'] = submission_result print(f"Submission result: {submission_result}") return results def save_results(self, results: List[Dict[str, Any]], filename: str = "gaia_results.json"): """Spara resultat till fil""" with open(filename, 'w', encoding='utf-8') as f: json.dump(results, f, indent=2, ensure_ascii=False) print(f"Results saved to {filename}") def save_submission_format(self, results: List[Dict[str, Any]], filename: str = "gaia_submission.jsonl"): """Spara resultat i GAIA submission format""" with open(filename, 'w', encoding='utf-8') as f: for result in results: if result['status'] == 'success': submission_entry = { "task_id": result['task_id'], "model_answer": result['answer'], "reasoning_trace": result['reasoning_trace'] } f.write(json.dumps(submission_entry, ensure_ascii=False) + '\n') print(f"Submission file saved to {filename}")