Final_Assignment_Template

Sleeping

App Files Files Community

dawid-lorek commited on 9 days ago

Commit

239dbcb

verified ·

1 Parent(s): eca84dc

Update agent.py

Browse files

Files changed (1) hide show

agent.py +53 -103

agent.py CHANGED Viewed

@@ -1,106 +1,56 @@
 import os
-import re
-import json
-import pandas as pd
-import tempfile
-import openpyxl
-import whisper
-from llama_index.llms.openai import OpenAI
-from llama_index.core.agent import FunctionCallingAgent
-from llama_index.core.tools import FunctionTool
-# === TOOL FUNCTIONS ===
-def reverse_sentence(sentence: str) -> str:
-    """Reverse a sentence character by character."""
-    return sentence[::-1]
-def extract_vegetables_from_list(grocery_list: str) -> str:
-    """Extract botanically valid vegetables from comma-separated list."""
-    known_vegetables = {
-        "broccoli", "celery", "green beans", "lettuce", "sweet potatoes"
-    }
-    items = [item.strip().lower() for item in grocery_list.split(",")]
-    vegetables = sorted(set(filter(lambda x: x in known_vegetables, items)))
-    return ", ".join(vegetables)
-def commutative_subset_hint(_: str) -> str:
-    """Static helper for commutative subset fallback."""
-    return "a, b, c"
-def convert_table_if_detected(question: str, file_context: str) -> str:
-    """If question contains a table about * on set S, try parsing non-commutative set."""
-    if "* on the set" in question and file_context:
         try:
-            table_lines = [
-                line.strip()
-                for line in file_context.splitlines()
-                if '|' in line and '*' not in line[:2]
-            ]
-            headers = re.split(r'\|+', table_lines[0])[1:-1]
-            data_rows = [re.split(r'\|+', row)[1:-1] for row in table_lines[1:]]
-            index = [row[0] for row in data_rows]
-            matrix = [row[1:] for row in data_rows]
-            df = pd.DataFrame(matrix, index=index, columns=headers)
-            non_comm = set()
-            for a in df.index:
-                for b in df.columns:
-                    if df.at[a, b] != df.at[b, a]:
-                        non_comm.add(a)
-                        non_comm.add(b)
-            result = ", ".join(sorted(non_comm))
-            file_context += f" [Parsed Non-Commutative Set] {result}"
         except Exception as e:
-            file_context += f" [Table Parse Error] {e}"
-    return file_context
-def transcribe_audio(file_path: str) -> str:
-    """Transcribe audio file using OpenAI Whisper."""
-    model = whisper.load_model("base")
-    result = model.transcribe(file_path)
-    return result['text']
-def extract_excel_total_food_sales(file_path: str) -> str:
-    """Extract total food sales from Excel file."""
-    wb = openpyxl.load_workbook(file_path)
-    sheet = wb.active
-    total = 0
-    for row in sheet.iter_rows(min_row=2, values_only=True):
-        category, amount = row[1], row[2]
-        if isinstance(category, str) and 'food' in category.lower():
-            total += float(amount)
-    return f"${total:.2f}"
-# === LLM SETUP ===
-llm = OpenAI(model="gpt-4o")
-# === TOOLS ===
-tools = [
-    FunctionTool.from_defaults(fn=reverse_sentence),
-    FunctionTool.from_defaults(fn=extract_vegetables_from_list),
-    FunctionTool.from_defaults(fn=commutative_subset_hint),
-]
-agent = FunctionCallingAgent.from_tools(
-    tools=tools,
-    llm=llm,
-    system_prompt=(
-        "You are a strict and factual research agent solving GAIA benchmark questions. "
-        "You must answer precisely, based only on available information. "
-        "Never hallucinate, and always return concise, well-formatted answers. "
-        "Use tools where necessary, and return plain text only — no extra explanation."
-    ),
-    verbose=True
-)
-# === MAIN AGENT CALL ===
-def answer_question(question: str, task_id: str = None, file_content: str = "") -> str:
-    file_context = file_content or ""
-    file_context = convert_table_if_detected(question, file_context)
-    try:
-        response = agent.get_response_sync(question)
-        return response.text if hasattr(response, "text") else str(response)
-    except Exception as e:
-        return f"[ERROR] {e}"

 import os
+import requests
+from openai import OpenAI
+class GaiaAgent:
+    def __init__(self):
+        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        self.instructions = (
+            "You are a top-tier research assistant for the GAIA benchmark. "
+            "You analyze documents, reason step by step, and always provide a single, concise, and correct answer. "
+            "If a file is provided, extract all relevant information. Use only information from the question and file. "
+            "Show your reasoning before the answer, but end with 'Final Answer: <your answer>'."
+        )
+        self.api_url = "https://agents-course-unit4-scoring.hf.space"
+    def fetch_file_content(self, task_id: str) -> str:
         try:
+            url = f"{self.api_url}/files/{task_id}"
+            response = requests.get(url, timeout=15)
+            response.raise_for_status()
+            content_type = response.headers.get("Content-Type", "")
+            if any(t in content_type for t in ["text", "csv", "json"]):
+                return response.text[:6000]  # Allow more context for better answers
+            elif "application/pdf" in content_type:
+                return "[PDF file detected. Use a PDF parser to extract text.]"
+            else:
+                return f"[Unsupported file type: {content_type}]"
         except Exception as e:
+            return f"[Error downloading or reading file: {e}]"
+    def __call__(self, question: str, task_id: str = None) -> str:
+        file_context = ""
+        if task_id:
+            file_context = self.fetch_file_content(task_id)
+            if file_context:
+                file_context = f"Here is the related file content:\n{file_context}\n"
+        prompt = (
+            f"{self.instructions}\n\n"
+            f"{file_context}"
+            f"Question: {question}\n"
+            "Show your reasoning step by step, then provide the final answer as 'Final Answer: <answer>'."
+        )
+        response = self.client.chat.completions.create(
+            model="gpt-4o",  # Use the latest, most capable model for better accuracy
+            messages=[
+                {"role": "system", "content": self.instructions},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.0,
+            max_tokens=1024,
+        )
+        return response.choices[0].message.content.strip()