File size: 2,437 Bytes
332e48b
2693f75
 
eb7cc40
 
332e48b
 
 
 
 
2693f75
 
332e48b
eb7cc40
 
2693f75
eb7cc40
 
 
 
 
2693f75
 
 
 
 
 
 
 
 
 
 
eb7cc40
2693f75
 
 
 
 
eb7cc40
 
2693f75
eb7cc40
2693f75
eb7cc40
 
 
 
2693f75
eb7cc40
2693f75
eb7cc40
2693f75
332e48b
 
 
 
 
eb7cc40
332e48b
 
 
eb7cc40
332e48b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
import io
import pandas as pd
import requests
from openai import OpenAI

class GaiaAgent:
    def __init__(self):
        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
        self.instructions = (
            "You are an expert assistant solving GAIA benchmark questions. "
            "You analyze file contents (like CSV), reason step-by-step, and respond with a single factual answer."
        )
        self.api_url = "https://agents-course-unit4-scoring.hf.space"

    def fetch_file_context(self, task_id: str) -> str:
        try:
            url = f"{self.api_url}/files/{task_id}"
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            content_type = response.headers.get("Content-Type", "")

            if "text/csv" in content_type or url.endswith(".csv"):
                df = pd.read_csv(io.StringIO(response.text))
                if df.shape[1] <= 15 and df.shape[0] <= 30:
                    return f"CSV table preview:\n{df.to_markdown(index=False)}"
                else:
                    return f"CSV summary: {df.shape[0]} rows, {df.shape[1]} columns.\nColumns: {', '.join(df.columns[:10])}"

            elif "application/json" in content_type:
                return f"JSON content:\n{response.text[:2000]}"

            elif "application/pdf" in content_type:
                return "[PDF detected. You may need to request OCR summary.]"

            elif "text/plain" in content_type:
                return f"File preview:\n{response.text[:2000]}"

            else:
                return f"[Unsupported file type: {content_type}]"

        except Exception as e:
            return f"[Error downloading or processing file: {e}]"

    def __call__(self, question: str, task_id: str = None) -> str:
        file_context = ""
        if task_id:
            file_context = self.fetch_file_context(task_id)
            if file_context:
                file_context = f"FILE CONTEXT:\n{file_context}\n"

        prompt = f"{self.instructions}\n\n{file_context}QUESTION:\n{question}\nANSWER:"

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": self.instructions},
                {"role": "user", "content": prompt}
            ],
            temperature=0.0,
        )

        return response.choices[0].message.content.strip()