dawid-lorek's picture
Update agent.py
9eb69da verified
raw
history blame
4.08 kB
import os
import io
import base64
import requests
import pandas as pd
from openai import OpenAI
class GaiaAgent:
def __init__(self):
self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
self.instructions = (
"You are a multimodal GAIA assistant capable of understanding text, images, audio, and code. "
"Use file context if provided, think step by step, and respond with the exact answer only."
)
self.api_url = "https://agents-course-unit4-scoring.hf.space"
def fetch_file(self, task_id: str) -> (str, bytes, str):
try:
url = f"{self.api_url}/files/{task_id}"
response = requests.get(url, timeout=15)
response.raise_for_status()
content_type = response.headers.get("Content-Type", "")
return url, response.content, content_type
except Exception as e:
return None, None, f"[Fetch error: {e}]"
def __call__(self, question: str, task_id: str = None) -> str:
image = None
audio = None
tool_context = ""
if task_id:
url, file_bytes, file_type = self.fetch_file(task_id)
if file_bytes is None:
tool_context = file_type # error message
elif "image" in file_type:
image = base64.b64encode(file_bytes).decode("utf-8")
elif "audio" in file_type:
audio = file_bytes
elif file_type.endswith("python"):
try:
exec_env = {}
exec(file_bytes.decode("utf-8"), {}, exec_env)
result = exec_env.get("result", "[Executed. Check code return value manually if needed.]")
tool_context = f"Python result: {result}"
except Exception as e:
tool_context = f"[Python execution error: {e}]"
elif "text" in file_type or "csv" in file_type:
tool_context = file_bytes.decode("utf-8")[:2000]
elif "pdf" in file_type:
tool_context = "[PDF file detected. OCR not yet implemented.]"
messages = [
{"role": "system", "content": self.instructions},
{"role": "user", "content": f"{tool_context}\n\nQUESTION: {question}\nANSWER:"}
]
try:
if image:
response = self.client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": self.instructions},
{
"role": "user",
"content": [
{"type": "text", "text": question},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image}",
"detail": "auto"
}
}
]
}
]
)
elif audio:
transcript = self.client.audio.transcriptions.create(
model="whisper-1",
file=io.BytesIO(audio),
response_format="text"
)
messages.append({"role": "user", "content": f"Transcript: {transcript.strip()}"})
response = self.client.chat.completions.create(
model="gpt-4-turbo",
messages=messages,
temperature=0.0
)
else:
response = self.client.chat.completions.create(
model="gpt-4-turbo",
messages=messages,
temperature=0.0
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"[Agent error: {e}]"