File size: 3,393 Bytes
332e48b 2693f75 eb7cc40 332e48b ffdfd85 332e48b eb7cc40 ffdfd85 5c63a78 ffdfd85 5c63a78 ffdfd85 5c63a78 ffdfd85 eb7cc40 2693f75 5c63a78 ffdfd85 5c63a78 ffdfd85 2693f75 5c63a78 ffdfd85 eb7cc40 2693f75 eb7cc40 ffdfd85 eb7cc40 ffdfd85 eb7cc40 ffdfd85 eb7cc40 ffdfd85 332e48b eb7cc40 332e48b eb7cc40 332e48b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import os
import io
import pandas as pd
import requests
from openai import OpenAI
class GaiaAgent:
def __init__(self):
self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
self.instructions = (
"You are a reasoning assistant solving GAIA benchmark questions. "
"If data is provided, analyze it logically and extract the relevant facts. "
"Think step by step. Output only the final answer."
)
self.api_url = "https://agents-course-unit4-scoring.hf.space"
def analyze_csv(self, csv_text: str, question: str) -> str:
try:
df = pd.read_csv(io.StringIO(csv_text))
question_lower = question.lower()
if any(k in question_lower for k in ["lowest", "cheapest", "minimum"]):
col = self._detect_column(df, ["price", "cost", "amount"])
if col:
row = df.sort_values(by=col).iloc[0].to_dict()
return f"Lowest {col}: {row}"
elif any(k in question_lower for k in ["highest", "most expensive", "maximum"]):
col = self._detect_column(df, ["price", "score", "rating"])
if col:
row = df.sort_values(by=col, ascending=False).iloc[0].to_dict()
return f"Highest {col}: {row}"
elif "how many" in question_lower:
return f"Total rows: {len(df)}"
# fallback
sample = df.iloc[0].to_dict()
return f"Sample row: {sample}"
except Exception as e:
return f"[CSV parsing failed: {e}]"
def _detect_column(self, df, candidates):
for col in df.columns:
for name in candidates:
if name in col.lower():
return col
return None
def fetch_file_context(self, task_id: str, question: str) -> str:
try:
url = f"{self.api_url}/files/{task_id}"
response = requests.get(url, timeout=10)
response.raise_for_status()
content_type = response.headers.get("Content-Type", "")
if "csv" in content_type or url.endswith(".csv"):
return self.analyze_csv(response.text, question)
elif "json" in content_type:
return f"JSON Preview: {response.text[:1000]}"
elif "text/plain" in content_type:
return f"Text Sample: {response.text[:1000]}"
elif "pdf" in content_type:
return "[PDF detected. OCR not supported.]"
else:
return f"[Unsupported file type: {content_type}]"
except Exception as e:
return f"[Error fetching file: {e}]"
def __call__(self, question: str, task_id: str = None) -> str:
file_fact = ""
if task_id:
file_fact = self.fetch_file_context(task_id, question)
file_fact = f"FILE INSIGHTS:\n{file_fact}\n"
prompt = f"{self.instructions}\n\n{file_fact}QUESTION: {question}\nANSWER:"
response = self.client.chat.completions.create(
model="gpt-4-turbo",
messages=[
{"role": "system", "content": self.instructions},
{"role": "user", "content": prompt}
],
temperature=0.0,
)
return response.choices[0].message.content.strip()
|