File size: 2,502 Bytes
332e48b
2693f75
 
eb7cc40
 
332e48b
 
 
 
 
5c63a78
 
 
332e48b
eb7cc40
 
5c63a78
 
 
 
 
 
 
 
 
 
 
2693f75
eb7cc40
 
 
 
 
2693f75
5c63a78
 
 
 
2693f75
5c63a78
 
 
eb7cc40
 
2693f75
eb7cc40
5c63a78
eb7cc40
 
5c63a78
eb7cc40
5c63a78
 
eb7cc40
5c63a78
332e48b
 
 
 
 
eb7cc40
332e48b
 
 
eb7cc40
332e48b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import io
import pandas as pd
import requests
from openai import OpenAI

class GaiaAgent:
    def __init__(self):
        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
        self.instructions = (
            "You are solving GAIA benchmark questions. "
            "If given a table, analyze it and extract relevant facts to answer accurately. "
            "Provide only the final answer. Do not explain."
        )
        self.api_url = "https://agents-course-unit4-scoring.hf.space"

    def summarize_csv(self, csv_text: str) -> str:
        try:
            df = pd.read_csv(io.StringIO(csv_text))
            summary = f"Rows: {len(df)}, Columns: {len(df.columns)}\n"
            summary += f"Columns: {', '.join(df.columns[:10])}\n"
            sample_row = df.iloc[0].to_dict()
            summary += f"Sample row: {sample_row}"
            return summary
        except Exception as e:
            return f"[Failed to parse CSV: {e}]"

    def fetch_file_context(self, task_id: str) -> str:
        try:
            url = f"{self.api_url}/files/{task_id}"
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            content_type = response.headers.get("Content-Type", "")

            if "csv" in content_type or url.endswith(".csv"):
                return self.summarize_csv(response.text)
            elif "json" in content_type:
                return f"JSON Sample: {response.text[:1000]}"
            elif "text/plain" in content_type:
                return f"Text Sample: {response.text[:1000]}"
            elif "pdf" in content_type:
                return "[PDF detected. OCR not supported in this version.]"
            else:
                return f"[Unsupported file type: {content_type}]"

        except Exception as e:
            return f"[File fetch error: {e}]"

    def __call__(self, question: str, task_id: str = None) -> str:
        context = ""
        if task_id:
            context = self.fetch_file_context(task_id)
            context = f"FILE DATA:\n{context}\n"

        prompt = f"{self.instructions}\n\n{context}QUESTION: {question}\nANSWER:"

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": self.instructions},
                {"role": "user", "content": prompt}
            ],
            temperature=0.0,
        )

        return response.choices[0].message.content.strip()