File size: 8,047 Bytes
89cdc9f
 
 
 
 
 
 
 
 
67d6957
89cdc9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc70c2d
 
 
89cdc9f
 
 
 
 
 
 
 
 
 
dc70c2d
 
89cdc9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67d6957
dc70c2d
89cdc9f
67d6957
89cdc9f
 
 
 
 
 
dc70c2d
89cdc9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import base64
import logging
import os
from io import BytesIO
from typing import Any

from smolagents import (
    CodeAgent,
    DuckDuckGoSearchTool,
    OpenAIServerModel,
    VisitWebpageTool,
    WikipediaSearchTool,
    tool,
)

system_prompt = """You are an AI Agent that is tasked to answer questions in a concise and accurate manner.
I will ask you a question and provide you with additional context if available.
Context can be in the form of Data(data), Code(code), Audio(audio), or Images(image_url).
Context is provided by specifying the content type followed by the content itself.

For example: code: print("Hello World") or Data: [1, 2, 3, 4, 5] or audio: [base64 encoded audio] or image_url: [base64 encoded image].

YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
DO NOT use formatting such as bold, italics, or code blocks in your final answer.
DO NOT use sources, references, or abbreviations in your final answer.
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
If you are asked for a specific number format, follow the instructions carefully.
If you are asked for a number only answer with the number itself, without any additional text or formatting.
If you are asked for a string only answer with the string itself, without any additional text or formatting.
If you are asked for a list only answer with the list itself, without any additional text or formatting.

Think step by step. Report your thoughts.
Finish your answer with the following template: 
FINAL ANSWER: [YOUR FINAL ANSWER].

For example, if the question is "What is the capital of France?", you should answer:
FINAL ANSWER: Paris

If the question is "What is 2 + 2?", you should answer:
FINAL ANSWER: 4

If the question is "What is 1 divided by 2, answer with 2 digits after the decimal point?", you should answer:
FINAL ANSWER: 0.50

If the quesion is "What is 10 * 10 with four digits after the decimal point?", you should answer:
FINAL ANSWER: 100.0000
"""


# def is_correct_format(answer: str, _) -> bool:
#     """Check if the answer contains a final answer in the correct format.

#     Args:
#         answer: The answer to check.

#     Returns:
#         True if the answer contains a final answer, False otherwise.
#         This ensures the final output is in the correct format.
#     """
#     return (
#         "ANSWER:" in answer
#         or "FINAL ANSWER:" in answer
#         or "Answer:" in answer
#         or "Final Answer:" in answer
#         or "answer:" in answer
#         or "final answer:" in answer
#         or "answer:" in answer.lower()
#         or "final answer:" in answer.lower()
#     )

@tool
def wikipedia_suggested_page(query: str) -> str:
    """Search Wikipedia for suggested pages based on the query.

    Args:
        query: The search query. The query should be coarse and not provide too many details.
            E.g. "Python programming" or "Artificial Intelligence".

    Returns:
        A list of suggested page titles. Pages are \n separated.
    """
    from wikipedia import suggest

    try:
        return suggest(query)
    except Exception as e:
        logging.error(f"Error fetching Wikipedia suggestions for '{query}': {e}")
        return f"Error fetching suggestions: {e}"


@tool
def wikipedia_page(title: str) -> str:
    """Search Wikipedia for a page based on the title.

    Args:
        title: The title of the Wikipedia page to search for.

    Returns:
        The content of the Wikipedia page.
    """
    from wikipedia import page

    try:
        return page(title, auto_suggest=True).content
    except Exception as e:
        logging.error(f"Error fetching Wikipedia page for '{title}': {e}")
        return f"Error fetching page: {e}"


class BasicAgent:
    def __init__(self):
        model = OpenAIServerModel(
            model_id="gpt-4o-mini",
            api_key=os.getenv("OPENAI_API_KEY"),
            temperature=0.0,
        )
        search = DuckDuckGoSearchTool(max_results=5)
        # speech_to_text = SpeechToTextTool()
        visitor = VisitWebpageTool(max_output_length=4000)
        wiki_search = WikipediaSearchTool()
        self.agent = CodeAgent(
            max_steps=10,
            verbosity_level=0,
            tools=[
                search,
                # speech_to_text,
                visitor,
                wiki_search,
                wikipedia_suggested_page,
                wikipedia_page,
            ],
            model=model,
            instructions=system_prompt,
            additional_authorized_imports=["pandas", "numpy"],
            use_structured_outputs_internally=True,
            add_base_tools=True,
        )
        logging.info(
            f"System prompt set for BasicAgent: {self.agent.memory.system_prompt}"
        )

    def __call__(self, question: str, content, content_type) -> Any:
        match content_type:
            case "xlsx":
                additional_args = {"data": content}
            case "py":
                additional_args = {"code": content}
            case "audio":
                additional_args = {"audio": content}
            case "png":
                buffer = BytesIO()
                content.save(buffer, format="PNG")
                buffer.seek(0)
                image_content = (
                    "data:image/png;base64,"
                    + base64.b64encode(buffer.getvalue()).decode("utf-8")
                )
                additional_args = {"image_url": image_content}
            case _:
                additional_args = None
        response = self.agent.run(
            question,
            additional_args=additional_args,
            images=[content] if content_type == "png" else None,
            reset=True,
        )
        return response

    @staticmethod
    def formatting(answer: str) -> str:
        """Extract the final answer from the response."""
        if "FINAL ANSWER:" in answer:
            answer = answer.split("FINAL ANSWER:")[-1].strip()
        if "ANSWER:" in answer:
            answer = answer.split("ANSWER:")[-1].strip()
        if "Answer:" in answer:
            answer = answer.split("Answer:")[-1].strip()
        if "Final Answer:" in answer:
            answer = answer.split("Final Answer:")[-1].strip()
        if "answer:" in answer.lower():
            answer = answer.split("answer:")[-1].strip()
        if "final answer:" in answer.lower():
            answer = answer.split("final answer:")[-1].strip()
        if "answer is:" in answer.lower():
            answer = answer.split("answer is:")[-1].strip()
        if "is:" in answer.lower():
            answer = answer.split("is:")[-1].strip()
        if "**" in answer:
            answer = answer.split("**")[-1].strip().replace("**", "")
        if "```" in answer:
            answer = answer.split("```")[-1].strip().replace("```", "")
        if "```python" in answer:
            answer = answer.split("```python")[-1].strip().replace("```", "")
        if "```json" in answer:
            answer = answer.split("```json")[-1].strip().replace("```", "")
        if "```yaml" in answer:
            answer = answer.split("```yaml")[-1].strip().replace("```", "")
        if "```txt" in answer:
            answer = answer.split("```txt")[-1].strip().replace("```", "")
        answer = answer.capitalize()
        
        answer = answer.replace('"', '').strip()
        answer = answer.replace("'", "").strip()
        answer = answer.replace("[", "").replace("]", "").strip()
        return answer.strip()  # Fallback to return the whole answer if no specific format found