File size: 4,081 Bytes
332e48b 2693f75 9eb69da eb7cc40 9eb69da eb7cc40 332e48b 9eb69da 332e48b eb7cc40 9eb69da eb7cc40 9eb69da eb7cc40 9eb69da eb7cc40 9eb69da eb7cc40 9eb69da d48b3cc eb7cc40 9eb69da eb7cc40 9eb69da 332e48b d48b3cc 9eb69da d48b3cc 9eb69da d48b3cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import os
import io
import base64
import requests
import pandas as pd
from openai import OpenAI
class GaiaAgent:
def __init__(self):
self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
self.instructions = (
"You are a multimodal GAIA assistant capable of understanding text, images, audio, and code. "
"Use file context if provided, think step by step, and respond with the exact answer only."
)
self.api_url = "https://agents-course-unit4-scoring.hf.space"
def fetch_file(self, task_id: str) -> (str, bytes, str):
try:
url = f"{self.api_url}/files/{task_id}"
response = requests.get(url, timeout=15)
response.raise_for_status()
content_type = response.headers.get("Content-Type", "")
return url, response.content, content_type
except Exception as e:
return None, None, f"[Fetch error: {e}]"
def __call__(self, question: str, task_id: str = None) -> str:
image = None
audio = None
tool_context = ""
if task_id:
url, file_bytes, file_type = self.fetch_file(task_id)
if file_bytes is None:
tool_context = file_type # error message
elif "image" in file_type:
image = base64.b64encode(file_bytes).decode("utf-8")
elif "audio" in file_type:
audio = file_bytes
elif file_type.endswith("python"):
try:
exec_env = {}
exec(file_bytes.decode("utf-8"), {}, exec_env)
result = exec_env.get("result", "[Executed. Check code return value manually if needed.]")
tool_context = f"Python result: {result}"
except Exception as e:
tool_context = f"[Python execution error: {e}]"
elif "text" in file_type or "csv" in file_type:
tool_context = file_bytes.decode("utf-8")[:2000]
elif "pdf" in file_type:
tool_context = "[PDF file detected. OCR not yet implemented.]"
messages = [
{"role": "system", "content": self.instructions},
{"role": "user", "content": f"{tool_context}\n\nQUESTION: {question}\nANSWER:"}
]
try:
if image:
response = self.client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": self.instructions},
{
"role": "user",
"content": [
{"type": "text", "text": question},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image}",
"detail": "auto"
}
}
]
}
]
)
elif audio:
transcript = self.client.audio.transcriptions.create(
model="whisper-1",
file=io.BytesIO(audio),
response_format="text"
)
messages.append({"role": "user", "content": f"Transcript: {transcript.strip()}"})
response = self.client.chat.completions.create(
model="gpt-4-turbo",
messages=messages,
temperature=0.0
)
else:
response = self.client.chat.completions.create(
model="gpt-4-turbo",
messages=messages,
temperature=0.0
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"[Agent error: {e}]"
|