File size: 4,036 Bytes
332e48b 8dcca97 5fffd11 6a05ca9 eb7cc40 332e48b 5fffd11 75e40db 8dcca97 6a05ca9 5fffd11 75e40db 8dcca97 6a05ca9 8dcca97 6a05ca9 8dcca97 6a05ca9 5fffd11 6a05ca9 5fffd11 392825a 6a05ca9 8dcca97 6a05ca9 5fffd11 8dcca97 6a05ca9 5fffd11 6a05ca9 8dcca97 6a05ca9 5fffd11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import os
import requests
import re
import base64
from openai import OpenAI
class GaiaAgent:
def __init__(self):
self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
self.api_url = "https://agents-course-unit4-scoring.hf.space"
self.instructions = (
"You are a highly skilled and concise research assistant solving GAIA benchmark questions.\n"
"Analyze attached files, video links, and images. Reason step-by-step internally.\n"
"Return only the final factual answer. Do not explain."
)
def fetch_file(self, task_id: str):
try:
url = f"{self.api_url}/files/{task_id}"
response = requests.get(url, timeout=10)
response.raise_for_status()
content_type = response.headers.get("Content-Type", "")
return response.content, content_type
except Exception as e:
return None, f"[File error: {e}]"
def extract_youtube_context(self, question: str) -> str:
match = re.search(r"https://www\.youtube\.com/watch\?v=([\w-]+)", question)
if match:
video_id = match.group(1)
return (
f"This question refers to a YouTube video with ID: {video_id}.\n"
f"Assume the video contains relevant visual or auditory cues.\n"
)
return ""
def extract_image_prompt(self, image_bytes: bytes) -> dict:
image_b64 = base64.b64encode(image_bytes).decode("utf-8")
return {
"role": "user",
"content": [
{"type": "text", "text": "Please analyze the image and answer the chess question accurately. Provide only the move in algebraic notation."},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}}
]
}
def __call__(self, question: str, task_id: str = None) -> str:
messages = [{"role": "system", "content": self.instructions}]
if task_id:
file_data, content_type = self.fetch_file(task_id)
if isinstance(content_type, str) and "image" in content_type:
image_message = self.extract_image_prompt(file_data)
messages.append(image_message)
messages.append({"role": "user", "content": question})
try:
response = self.client.chat.completions.create(
model="gpt-4o",
messages=messages
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"[Image answer error: {e}]"
elif isinstance(content_type, str) and ("text" in content_type or "csv" in content_type or "json" in content_type):
context = file_data.decode(errors="ignore")[:3000]
messages.append({"role": "user", "content": f"File Content:\n{context}\n\nQuestion: {question}"})
elif isinstance(content_type, str) and "pdf" in content_type:
messages.append({"role": "user", "content": f"[PDF content detected]\n\nQuestion: {question}"})
elif isinstance(content_type, str) and "audio" in content_type:
messages.append({"role": "user", "content": f"[Audio content detected]\n\nQuestion: {question}"})
video_context = self.extract_youtube_context(question)
if video_context:
messages.append({"role": "user", "content": f"{video_context}\n\nQuestion: {question}"})
elif not any(m["role"] == "user" for m in messages):
messages.append({"role": "user", "content": question})
try:
response = self.client.chat.completions.create(
model="gpt-4-turbo",
messages=messages,
temperature=0.0
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"[Answer error: {e}]"
|