File size: 4,273 Bytes
fde864c
 
 
c09e7e2
 
fde864c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c09e7e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fde864c
c09e7e2
 
 
 
 
 
 
 
 
 
 
 
 
fde864c
 
 
 
 
 
 
 
bd702b9
 
 
e225216
bd702b9
 
fde864c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
import re
import requests
import tempfile
import pandas as pd
from openai import OpenAI
from duckduckgo_search import DDGS

PROMPT = (
    "You are a general AI assistant. I will ask you a question. "
    "Report your thoughts, and finish your answer with the following template: "
    "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. "
    "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. "
    "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
    "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
)

class BasicAgent:
    def __init__(self):
        self.llm = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
        print("BasicAgent initialized.")

    def web_search(self, query: str, max_results: int = 5) -> str:
        try:
            with DDGS() as ddgs:
                results = list(ddgs.text(query, max_results=max_results))
            if not results:
                return ""
            formatted_results = ""
            for i, result in enumerate(results, 1):
                title = result.get('title', '')
                body = result.get('body', '')
                href = result.get('href', '')
                formatted_results += f"{i}. {title}\n   URL: {href}\n   Description: {body}\n\n"
            return formatted_results
        except Exception as e:
            return ""

    def excel_tool(self, file_url: str) -> str:
        try:
            r = requests.get(file_url, timeout=20)
            r.raise_for_status()
            with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as f:
                f.write(r.content)
                f.flush()
                excel_path = f.name
            df = pd.read_excel(excel_path)
            # Simple heuristic: try to sum 'Sales' where 'Type' == 'food'
            if "Type" in df.columns and "Sales" in df.columns:
                total = df[df["Type"].str.lower() == "food"]["Sales"].sum()
                return f"{round(total, 2)}"
            # Fallback: sum all numeric columns
            total = df.select_dtypes(include='number').sum().sum()
            return f"{round(total, 2)}"
        except Exception as e:
            return "Unable to read Excel file"

    def fetch_file_url(self, task_id):
        DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
        try:
            url = f"{DEFAULT_API_URL}/files/{task_id}"
            r = requests.head(url, timeout=5)
            if r.status_code == 200:
                return url
        except Exception:
            pass
        return None

    def __call__(self, question: str, task_id: str = None) -> str:
        # If there is a file, and question looks like Excel/data, use excel_tool
        file_url = self.fetch_file_url(task_id) if task_id else None
        answer = None

        if file_url and ("excel" in question.lower() or "spreadsheet" in question.lower() or "file" in question.lower()):
            try:
                excel_result = self.excel_tool(file_url)
                if excel_result and "unable" not in excel_result.lower():
                    return excel_result
            except Exception:
                pass  # Fall back to search+llm

        # Otherwise, use web search + LLM
        search_snippet = self.web_search(question)
        prompt = PROMPT + f"\n\nWeb search results:\n{search_snippet}\n\nQuestion: {question}"
        response = self.llm.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "system", "content": prompt}],
            temperature=0.0,
            max_tokens=512,
        )
        answer = response.choices[0].message.content.strip()
        final_line = ""
        for line in answer.splitlines():
            if line.strip().lower().startswith("final answer:"):
                final_line = line.split(":", 1)[-1].strip(" .\"'")
                break
        return final_line or answer