import os import io import base64 import requests import pandas as pd from openai import OpenAI class GaiaAgent: def __init__(self): self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) self.instructions = ( "You are a multimodal GAIA assistant capable of understanding text, images, audio, and code. " "Use file context if provided, think step by step, and respond with the exact answer only." ) self.api_url = "https://agents-course-unit4-scoring.hf.space" def fetch_file(self, task_id: str) -> (str, bytes, str): try: url = f"{self.api_url}/files/{task_id}" response = requests.get(url, timeout=15) response.raise_for_status() content_type = response.headers.get("Content-Type", "") return url, response.content, content_type except Exception as e: return None, None, f"[Fetch error: {e}]" def __call__(self, question: str, task_id: str = None) -> str: image = None audio = None tool_context = "" if task_id: url, file_bytes, file_type = self.fetch_file(task_id) if file_bytes is None: tool_context = file_type # error message elif "image" in file_type: image = base64.b64encode(file_bytes).decode("utf-8") elif "audio" in file_type: audio = file_bytes elif file_type.endswith("python"): try: exec_env = {} exec(file_bytes.decode("utf-8"), {}, exec_env) result = exec_env.get("result", "[Executed. Check code return value manually if needed.]") tool_context = f"Python result: {result}" except Exception as e: tool_context = f"[Python execution error: {e}]" elif "text" in file_type or "csv" in file_type: tool_context = file_bytes.decode("utf-8")[:2000] elif "pdf" in file_type: tool_context = "[PDF file detected. OCR not yet implemented.]" messages = [ {"role": "system", "content": self.instructions}, {"role": "user", "content": f"{tool_context}\n\nQUESTION: {question}\nANSWER:"} ] try: if image: response = self.client.chat.completions.create( model="gpt-4o", messages=[ {"role": "system", "content": self.instructions}, { "role": "user", "content": [ {"type": "text", "text": question}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image}", "detail": "auto" } } ] } ] ) elif audio: transcript = self.client.audio.transcriptions.create( model="whisper-1", file=io.BytesIO(audio), response_format="text" ) messages.append({"role": "user", "content": f"Transcript: {transcript.strip()}"}) response = self.client.chat.completions.create( model="gpt-4-turbo", messages=messages, temperature=0.0 ) else: response = self.client.chat.completions.create( model="gpt-4-turbo", messages=messages, temperature=0.0 ) return response.choices[0].message.content.strip() except Exception as e: return f"[Agent error: {e}]"