File size: 4,130 Bytes
332e48b
5fffd11
1bf6d60
6a05ca9
6acc56a
08aa3fd
eb7cc40
332e48b
 
 
 
5fffd11
8dcca97
08aa3fd
1bf6d60
08aa3fd
 
6acc56a
08aa3fd
 
 
6acc56a
08aa3fd
6a05ca9
0e46560
ddbce07
0e46560
ddbce07
eab1747
 
ddbce07
 
08aa3fd
1bf6d60
8dcca97
eab1747
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e46560
6a05ca9
ddbce07
 
 
 
6a05ca9
ddbce07
08aa3fd
1bf6d60
 
 
 
 
 
 
 
 
0e46560
1bf6d60
 
eab1747
 
 
 
 
 
08aa3fd
eab1747
ddbce07
eab1747
 
ddbce07
eab1747
 
ddbce07
eab1747
 
 
 
 
 
 
 
 
 
 
 
 
0e46560
eab1747
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import os
import re
import requests
import base64
import io
import pandas as pd
from openai import OpenAI

class GaiaAgent:
    def __init__(self):
        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
        self.api_url = "https://agents-course-unit4-scoring.hf.space"

    def clean(self, text):
        return text.strip().replace("Final Answer:", "").replace("\n", "").replace(".", "").strip()

    def fetch_file(self, task_id):
        try:
            r = requests.get(f"{self.api_url}/files/{task_id}", timeout=10)
            r.raise_for_status()
            return r.content, r.headers.get("Content-Type", "")
        except Exception as e:
            return None, f"[Fetch error: {e}]"

    def ask(self, prompt: str, model="gpt-4-turbo") -> str:
        res = self.client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a precise assistant. Think step by step and return only the final answer in the correct format."},
                {"role": "user", "content": prompt + "\n\nFinal Answer:"}
            ],
            temperature=0.0,
        )
        return self.clean(res.choices[0].message.content)

    def ask_image(self, image_bytes: bytes, question: str) -> str:
        b64 = base64.b64encode(image_bytes).decode()
        messages = [
            {"role": "system", "content": "You are a visual assistant. Only return the final answer to the question."},
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": question},
                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}}
                ]
            }
        ]
        res = self.client.chat.completions.create(model="gpt-4o", messages=messages)
        return self.clean(res.choices[0].message.content)

    def q_excel_sales(self, file: bytes, question: str) -> str:
        try:
            df = pd.read_excel(io.BytesIO(file), engine="openpyxl")
            food = df[df['category'].str.lower() == 'food']
            total = food['sales'].sum()
            return f"${total:.2f}"
        except Exception as e:
            return f"[Excel error: {e}]"

    def q_audio_transcribe(self, file: bytes, question: str) -> str:
        audio_path = "/tmp/audio.mp3"
        with open(audio_path, "wb") as f:
            f.write(file)
        transcript = self.client.audio.transcriptions.create(
            model="whisper-1",
            file=open(audio_path, "rb")
        )
        content = transcript.text[:3000]
        prompt = f"Transcript: {content}\n\nQuestion: {question}"
        return self.ask(prompt)

    def extract_youtube_hint(self, question: str) -> str:
        match = re.search(r"https://www\.youtube\.com/watch\?v=([\w-]+)", question)
        if match:
            return f"This task is about a YouTube video (ID: {match.group(1)}). Assume the video visually or audibly answers the question."
        return ""

    def __call__(self, question: str, task_id: str = None) -> str:
        context = ""

        if "youtube.com/watch" in question:
            context += self.extract_youtube_hint(question) + "\n"

        if task_id:
            file, content_type = self.fetch_file(task_id)

            if isinstance(file, bytes) and content_type:
                if "image" in content_type:
                    return self.ask_image(file, question)
                if "audio" in content_type or task_id.endswith(".mp3"):
                    return self.q_audio_transcribe(file, question)
                if "spreadsheet" in content_type or content_type.endswith("excel") or content_type.endswith("xlsx"):
                    return self.q_excel_sales(file, question)
                if "text" in content_type:
                    try:
                        text = file.decode("utf-8", errors="ignore")[:3000]
                        context += f"File Content:\n{text}\n"
                    except Exception:
                        pass

        prompt = f"{context}\nQuestion: {question}"
        return self.ask(prompt)