Final_Assignment_Template

Sleeping

App Files Files Community

dawid-lorek commited on 9 days ago

Commit

9eb69da

verified ·

1 Parent(s): d48b3cc

Update agent.py

Browse files

Files changed (1) hide show

agent.py +74 -52

agent.py CHANGED Viewed

@@ -1,79 +1,101 @@
 import os
 import io
-import pandas as pd
 import requests
 from openai import OpenAI
-SKIPPED_TASKS = {
-    # Tasks requiring video, image, or audio
-    "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",  # YouTube birds
-    "cca530fc-4052-43b2-b130-b30968d8aa44",  # Chess image
-    "9d191bce-651d-4746-be2d-7ef8ecadb9c2",  # Teal'c audio
-    "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",  # Strawberry pie.mp3
-    "1f975693-876d-457b-a649-393859e79bf3"   # Homework.mp3
-}
 class GaiaAgent:
     def __init__(self):
         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
         self.instructions = (
-            "You are a precise and logical assistant solving GAIA benchmark questions. "
-            "Use any context or data provided. Respond with only the final answer."
         )
         self.api_url = "https://agents-course-unit4-scoring.hf.space"
-    def analyze_csv(self, csv_text: str, question: str) -> str:
-        try:
-            df = pd.read_csv(io.StringIO(csv_text))
-            q = question.lower()
-            if "total" in q and "food" in q and "not including drinks" in q:
-                food_items = df[df["category"].str.lower() == "food"]
-                return f"Total food sales: ${food_items["sales"].sum():.2f}"
-            return f"Sample row: {df.iloc[0].to_dict()}"
-        except Exception as e:
-            return f"[CSV parse failed: {e}]"
-    def fetch_file_context(self, task_id: str, question: str) -> str:
         try:
             url = f"{self.api_url}/files/{task_id}"
-            response = requests.get(url, timeout=10)
             response.raise_for_status()
             content_type = response.headers.get("Content-Type", "")
-            if "csv" in content_type or url.endswith(".csv"):
-                return self.analyze_csv(response.text, question)
-            elif "json" in content_type:
-                return f"JSON Preview: {response.text[:1000]}"
-            elif "text/plain" in content_type:
-                return f"Text Preview: {response.text[:1000]}"
-            elif "pdf" in content_type:
-                return "[PDF detected. OCR not supported.]"
-            else:
-                return f"[Unsupported file type: {content_type}]"
         except Exception as e:
-            return f"[File error: {e}]"
     def __call__(self, question: str, task_id: str = None) -> str:
-        if task_id in SKIPPED_TASKS:
-            return "SKIPPED"
-        file_fact = ""
         if task_id:
-            file_fact = self.fetch_file_context(task_id, question)
-            file_fact = f"FILE CONTEXT:\n{file_fact}\n"
-        prompt = f"{self.instructions}\n\n{file_fact}QUESTION: {question}\nANSWER:"
         try:
-            response = self.client.chat.completions.create(
-                model="gpt-4-turbo",
-                messages=[
-                    {"role": "system", "content": self.instructions},
-                    {"role": "user", "content": prompt}
-                ],
-                temperature=0.0,
-            )
             return response.choices[0].message.content.strip()
         except Exception as e:
             return f"[Agent error: {e}]"

 import os
 import io
+import base64
 import requests
+import pandas as pd
 from openai import OpenAI
 class GaiaAgent:
     def __init__(self):
         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
         self.instructions = (
+            "You are a multimodal GAIA assistant capable of understanding text, images, audio, and code. "
+            "Use file context if provided, think step by step, and respond with the exact answer only."
         )
         self.api_url = "https://agents-course-unit4-scoring.hf.space"
+    def fetch_file(self, task_id: str) -> (str, bytes, str):
         try:
             url = f"{self.api_url}/files/{task_id}"
+            response = requests.get(url, timeout=15)
             response.raise_for_status()
             content_type = response.headers.get("Content-Type", "")
+            return url, response.content, content_type
         except Exception as e:
+            return None, None, f"[Fetch error: {e}]"
     def __call__(self, question: str, task_id: str = None) -> str:
+        image = None
+        audio = None
+        tool_context = ""
         if task_id:
+            url, file_bytes, file_type = self.fetch_file(task_id)
+            if file_bytes is None:
+                tool_context = file_type  # error message
+            elif "image" in file_type:
+                image = base64.b64encode(file_bytes).decode("utf-8")
+            elif "audio" in file_type:
+                audio = file_bytes
+            elif file_type.endswith("python"):
+                try:
+                    exec_env = {}
+                    exec(file_bytes.decode("utf-8"), {}, exec_env)
+                    result = exec_env.get("result", "[Executed. Check code return value manually if needed.]")
+                    tool_context = f"Python result: {result}"
+                except Exception as e:
+                    tool_context = f"[Python execution error: {e}]"
+            elif "text" in file_type or "csv" in file_type:
+                tool_context = file_bytes.decode("utf-8")[:2000]
+            elif "pdf" in file_type:
+                tool_context = "[PDF file detected. OCR not yet implemented.]"
+        messages = [
+            {"role": "system", "content": self.instructions},
+            {"role": "user", "content": f"{tool_context}\n\nQUESTION: {question}\nANSWER:"}
+        ]
         try:
+            if image:
+                response = self.client.chat.completions.create(
+                    model="gpt-4o",
+                    messages=[
+                        {"role": "system", "content": self.instructions},
+                        {
+                            "role": "user",
+                            "content": [
+                                {"type": "text", "text": question},
+                                {
+                                    "type": "image_url",
+                                    "image_url": {
+                                        "url": f"data:image/png;base64,{image}",
+                                        "detail": "auto"
+                                    }
+                                }
+                            ]
+                        }
+                    ]
+                )
+            elif audio:
+                transcript = self.client.audio.transcriptions.create(
+                    model="whisper-1",
+                    file=io.BytesIO(audio),
+                    response_format="text"
+                )
+                messages.append({"role": "user", "content": f"Transcript: {transcript.strip()}"})
+                response = self.client.chat.completions.create(
+                    model="gpt-4-turbo",
+                    messages=messages,
+                    temperature=0.0
+                )
+            else:
+                response = self.client.chat.completions.create(
+                    model="gpt-4-turbo",
+                    messages=messages,
+                    temperature=0.0
+                )
             return response.choices[0].message.content.strip()
         except Exception as e:
             return f"[Agent error: {e}]"