File size: 3,393 Bytes
332e48b
2693f75
 
eb7cc40
 
332e48b
 
 
 
 
ffdfd85
 
 
332e48b
eb7cc40
 
ffdfd85
5c63a78
 
ffdfd85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c63a78
ffdfd85
 
 
 
 
 
 
 
5c63a78
ffdfd85
eb7cc40
 
 
 
 
2693f75
5c63a78
ffdfd85
5c63a78
ffdfd85
2693f75
5c63a78
 
ffdfd85
eb7cc40
 
2693f75
eb7cc40
ffdfd85
eb7cc40
 
ffdfd85
eb7cc40
ffdfd85
 
eb7cc40
ffdfd85
332e48b
 
 
 
 
eb7cc40
332e48b
 
 
eb7cc40
332e48b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import io
import pandas as pd
import requests
from openai import OpenAI

class GaiaAgent:
    def __init__(self):
        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
        self.instructions = (
            "You are a reasoning assistant solving GAIA benchmark questions. "
            "If data is provided, analyze it logically and extract the relevant facts. "
            "Think step by step. Output only the final answer."
        )
        self.api_url = "https://agents-course-unit4-scoring.hf.space"

    def analyze_csv(self, csv_text: str, question: str) -> str:
        try:
            df = pd.read_csv(io.StringIO(csv_text))
            question_lower = question.lower()

            if any(k in question_lower for k in ["lowest", "cheapest", "minimum"]):
                col = self._detect_column(df, ["price", "cost", "amount"])
                if col:
                    row = df.sort_values(by=col).iloc[0].to_dict()
                    return f"Lowest {col}: {row}"
            elif any(k in question_lower for k in ["highest", "most expensive", "maximum"]):
                col = self._detect_column(df, ["price", "score", "rating"])
                if col:
                    row = df.sort_values(by=col, ascending=False).iloc[0].to_dict()
                    return f"Highest {col}: {row}"
            elif "how many" in question_lower:
                return f"Total rows: {len(df)}"
            # fallback
            sample = df.iloc[0].to_dict()
            return f"Sample row: {sample}"

        except Exception as e:
            return f"[CSV parsing failed: {e}]"

    def _detect_column(self, df, candidates):
        for col in df.columns:
            for name in candidates:
                if name in col.lower():
                    return col
        return None

    def fetch_file_context(self, task_id: str, question: str) -> str:
        try:
            url = f"{self.api_url}/files/{task_id}"
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            content_type = response.headers.get("Content-Type", "")

            if "csv" in content_type or url.endswith(".csv"):
                return self.analyze_csv(response.text, question)
            elif "json" in content_type:
                return f"JSON Preview: {response.text[:1000]}"
            elif "text/plain" in content_type:
                return f"Text Sample: {response.text[:1000]}"
            elif "pdf" in content_type:
                return "[PDF detected. OCR not supported.]"
            else:
                return f"[Unsupported file type: {content_type}]"

        except Exception as e:
            return f"[Error fetching file: {e}]"

    def __call__(self, question: str, task_id: str = None) -> str:
        file_fact = ""
        if task_id:
            file_fact = self.fetch_file_context(task_id, question)
            file_fact = f"FILE INSIGHTS:\n{file_fact}\n"

        prompt = f"{self.instructions}\n\n{file_fact}QUESTION: {question}\nANSWER:"

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": self.instructions},
                {"role": "user", "content": prompt}
            ],
            temperature=0.0,
        )

        return response.choices[0].message.content.strip()