import os import io import pandas as pd import requests from openai import OpenAI class GaiaAgent: def __init__(self): self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) self.instructions = ( "You are solving GAIA benchmark questions. " "If given a table, analyze it and extract relevant facts to answer accurately. " "Provide only the final answer. Do not explain." ) self.api_url = "https://agents-course-unit4-scoring.hf.space" def summarize_csv(self, csv_text: str) -> str: try: df = pd.read_csv(io.StringIO(csv_text)) summary = f"Rows: {len(df)}, Columns: {len(df.columns)}\n" summary += f"Columns: {', '.join(df.columns[:10])}\n" sample_row = df.iloc[0].to_dict() summary += f"Sample row: {sample_row}" return summary except Exception as e: return f"[Failed to parse CSV: {e}]" def fetch_file_context(self, task_id: str) -> str: try: url = f"{self.api_url}/files/{task_id}" response = requests.get(url, timeout=10) response.raise_for_status() content_type = response.headers.get("Content-Type", "") if "csv" in content_type or url.endswith(".csv"): return self.summarize_csv(response.text) elif "json" in content_type: return f"JSON Sample: {response.text[:1000]}" elif "text/plain" in content_type: return f"Text Sample: {response.text[:1000]}" elif "pdf" in content_type: return "[PDF detected. OCR not supported in this version.]" else: return f"[Unsupported file type: {content_type}]" except Exception as e: return f"[File fetch error: {e}]" def __call__(self, question: str, task_id: str = None) -> str: context = "" if task_id: context = self.fetch_file_context(task_id) context = f"FILE DATA:\n{context}\n" prompt = f"{self.instructions}\n\n{context}QUESTION: {question}\nANSWER:" response = self.client.chat.completions.create( model="gpt-4-turbo", messages=[ {"role": "system", "content": self.instructions}, {"role": "user", "content": prompt} ], temperature=0.0, ) return response.choices[0].message.content.strip()