Update agent.py
Browse files
agent.py
CHANGED
@@ -8,11 +8,23 @@ class GaiaAgent:
|
|
8 |
def __init__(self):
|
9 |
self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
10 |
self.instructions = (
|
11 |
-
"You are
|
12 |
-
"
|
|
|
13 |
)
|
14 |
self.api_url = "https://agents-course-unit4-scoring.hf.space"
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
def fetch_file_context(self, task_id: str) -> str:
|
17 |
try:
|
18 |
url = f"{self.api_url}/files/{task_id}"
|
@@ -20,36 +32,27 @@ class GaiaAgent:
|
|
20 |
response.raise_for_status()
|
21 |
content_type = response.headers.get("Content-Type", "")
|
22 |
|
23 |
-
if "
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
else:
|
28 |
-
return f"CSV summary: {df.shape[0]} rows, {df.shape[1]} columns.\nColumns: {', '.join(df.columns[:10])}"
|
29 |
-
|
30 |
-
elif "application/json" in content_type:
|
31 |
-
return f"JSON content:\n{response.text[:2000]}"
|
32 |
-
|
33 |
-
elif "application/pdf" in content_type:
|
34 |
-
return "[PDF detected. You may need to request OCR summary.]"
|
35 |
-
|
36 |
elif "text/plain" in content_type:
|
37 |
-
return f"
|
38 |
-
|
|
|
39 |
else:
|
40 |
return f"[Unsupported file type: {content_type}]"
|
41 |
|
42 |
except Exception as e:
|
43 |
-
return f"[
|
44 |
|
45 |
def __call__(self, question: str, task_id: str = None) -> str:
|
46 |
-
|
47 |
if task_id:
|
48 |
-
|
49 |
-
|
50 |
-
file_context = f"FILE CONTEXT:\n{file_context}\n"
|
51 |
|
52 |
-
prompt = f"{self.instructions}\n\n{
|
53 |
|
54 |
response = self.client.chat.completions.create(
|
55 |
model="gpt-4-turbo",
|
|
|
8 |
def __init__(self):
|
9 |
self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
10 |
self.instructions = (
|
11 |
+
"You are solving GAIA benchmark questions. "
|
12 |
+
"If given a table, analyze it and extract relevant facts to answer accurately. "
|
13 |
+
"Provide only the final answer. Do not explain."
|
14 |
)
|
15 |
self.api_url = "https://agents-course-unit4-scoring.hf.space"
|
16 |
|
17 |
+
def summarize_csv(self, csv_text: str) -> str:
|
18 |
+
try:
|
19 |
+
df = pd.read_csv(io.StringIO(csv_text))
|
20 |
+
summary = f"Rows: {len(df)}, Columns: {len(df.columns)}\n"
|
21 |
+
summary += f"Columns: {', '.join(df.columns[:10])}\n"
|
22 |
+
sample_row = df.iloc[0].to_dict()
|
23 |
+
summary += f"Sample row: {sample_row}"
|
24 |
+
return summary
|
25 |
+
except Exception as e:
|
26 |
+
return f"[Failed to parse CSV: {e}]"
|
27 |
+
|
28 |
def fetch_file_context(self, task_id: str) -> str:
|
29 |
try:
|
30 |
url = f"{self.api_url}/files/{task_id}"
|
|
|
32 |
response.raise_for_status()
|
33 |
content_type = response.headers.get("Content-Type", "")
|
34 |
|
35 |
+
if "csv" in content_type or url.endswith(".csv"):
|
36 |
+
return self.summarize_csv(response.text)
|
37 |
+
elif "json" in content_type:
|
38 |
+
return f"JSON Sample: {response.text[:1000]}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
elif "text/plain" in content_type:
|
40 |
+
return f"Text Sample: {response.text[:1000]}"
|
41 |
+
elif "pdf" in content_type:
|
42 |
+
return "[PDF detected. OCR not supported in this version.]"
|
43 |
else:
|
44 |
return f"[Unsupported file type: {content_type}]"
|
45 |
|
46 |
except Exception as e:
|
47 |
+
return f"[File fetch error: {e}]"
|
48 |
|
49 |
def __call__(self, question: str, task_id: str = None) -> str:
|
50 |
+
context = ""
|
51 |
if task_id:
|
52 |
+
context = self.fetch_file_context(task_id)
|
53 |
+
context = f"FILE DATA:\n{context}\n"
|
|
|
54 |
|
55 |
+
prompt = f"{self.instructions}\n\n{context}QUESTION: {question}\nANSWER:"
|
56 |
|
57 |
response = self.client.chat.completions.create(
|
58 |
model="gpt-4-turbo",
|