Spaces:
Sleeping
Sleeping
File size: 8,047 Bytes
89cdc9f 67d6957 89cdc9f dc70c2d 89cdc9f dc70c2d 89cdc9f 67d6957 dc70c2d 89cdc9f 67d6957 89cdc9f dc70c2d 89cdc9f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
import base64
import logging
import os
from io import BytesIO
from typing import Any
from smolagents import (
CodeAgent,
DuckDuckGoSearchTool,
OpenAIServerModel,
VisitWebpageTool,
WikipediaSearchTool,
tool,
)
system_prompt = """You are an AI Agent that is tasked to answer questions in a concise and accurate manner.
I will ask you a question and provide you with additional context if available.
Context can be in the form of Data(data), Code(code), Audio(audio), or Images(image_url).
Context is provided by specifying the content type followed by the content itself.
For example: code: print("Hello World") or Data: [1, 2, 3, 4, 5] or audio: [base64 encoded audio] or image_url: [base64 encoded image].
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
DO NOT use formatting such as bold, italics, or code blocks in your final answer.
DO NOT use sources, references, or abbreviations in your final answer.
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
If you are asked for a specific number format, follow the instructions carefully.
If you are asked for a number only answer with the number itself, without any additional text or formatting.
If you are asked for a string only answer with the string itself, without any additional text or formatting.
If you are asked for a list only answer with the list itself, without any additional text or formatting.
Think step by step. Report your thoughts.
Finish your answer with the following template:
FINAL ANSWER: [YOUR FINAL ANSWER].
For example, if the question is "What is the capital of France?", you should answer:
FINAL ANSWER: Paris
If the question is "What is 2 + 2?", you should answer:
FINAL ANSWER: 4
If the question is "What is 1 divided by 2, answer with 2 digits after the decimal point?", you should answer:
FINAL ANSWER: 0.50
If the quesion is "What is 10 * 10 with four digits after the decimal point?", you should answer:
FINAL ANSWER: 100.0000
"""
# def is_correct_format(answer: str, _) -> bool:
# """Check if the answer contains a final answer in the correct format.
# Args:
# answer: The answer to check.
# Returns:
# True if the answer contains a final answer, False otherwise.
# This ensures the final output is in the correct format.
# """
# return (
# "ANSWER:" in answer
# or "FINAL ANSWER:" in answer
# or "Answer:" in answer
# or "Final Answer:" in answer
# or "answer:" in answer
# or "final answer:" in answer
# or "answer:" in answer.lower()
# or "final answer:" in answer.lower()
# )
@tool
def wikipedia_suggested_page(query: str) -> str:
"""Search Wikipedia for suggested pages based on the query.
Args:
query: The search query. The query should be coarse and not provide too many details.
E.g. "Python programming" or "Artificial Intelligence".
Returns:
A list of suggested page titles. Pages are \n separated.
"""
from wikipedia import suggest
try:
return suggest(query)
except Exception as e:
logging.error(f"Error fetching Wikipedia suggestions for '{query}': {e}")
return f"Error fetching suggestions: {e}"
@tool
def wikipedia_page(title: str) -> str:
"""Search Wikipedia for a page based on the title.
Args:
title: The title of the Wikipedia page to search for.
Returns:
The content of the Wikipedia page.
"""
from wikipedia import page
try:
return page(title, auto_suggest=True).content
except Exception as e:
logging.error(f"Error fetching Wikipedia page for '{title}': {e}")
return f"Error fetching page: {e}"
class BasicAgent:
def __init__(self):
model = OpenAIServerModel(
model_id="gpt-4o-mini",
api_key=os.getenv("OPENAI_API_KEY"),
temperature=0.0,
)
search = DuckDuckGoSearchTool(max_results=5)
# speech_to_text = SpeechToTextTool()
visitor = VisitWebpageTool(max_output_length=4000)
wiki_search = WikipediaSearchTool()
self.agent = CodeAgent(
max_steps=10,
verbosity_level=0,
tools=[
search,
# speech_to_text,
visitor,
wiki_search,
wikipedia_suggested_page,
wikipedia_page,
],
model=model,
instructions=system_prompt,
additional_authorized_imports=["pandas", "numpy"],
use_structured_outputs_internally=True,
add_base_tools=True,
)
logging.info(
f"System prompt set for BasicAgent: {self.agent.memory.system_prompt}"
)
def __call__(self, question: str, content, content_type) -> Any:
match content_type:
case "xlsx":
additional_args = {"data": content}
case "py":
additional_args = {"code": content}
case "audio":
additional_args = {"audio": content}
case "png":
buffer = BytesIO()
content.save(buffer, format="PNG")
buffer.seek(0)
image_content = (
"data:image/png;base64,"
+ base64.b64encode(buffer.getvalue()).decode("utf-8")
)
additional_args = {"image_url": image_content}
case _:
additional_args = None
response = self.agent.run(
question,
additional_args=additional_args,
images=[content] if content_type == "png" else None,
reset=True,
)
return response
@staticmethod
def formatting(answer: str) -> str:
"""Extract the final answer from the response."""
if "FINAL ANSWER:" in answer:
answer = answer.split("FINAL ANSWER:")[-1].strip()
if "ANSWER:" in answer:
answer = answer.split("ANSWER:")[-1].strip()
if "Answer:" in answer:
answer = answer.split("Answer:")[-1].strip()
if "Final Answer:" in answer:
answer = answer.split("Final Answer:")[-1].strip()
if "answer:" in answer.lower():
answer = answer.split("answer:")[-1].strip()
if "final answer:" in answer.lower():
answer = answer.split("final answer:")[-1].strip()
if "answer is:" in answer.lower():
answer = answer.split("answer is:")[-1].strip()
if "is:" in answer.lower():
answer = answer.split("is:")[-1].strip()
if "**" in answer:
answer = answer.split("**")[-1].strip().replace("**", "")
if "```" in answer:
answer = answer.split("```")[-1].strip().replace("```", "")
if "```python" in answer:
answer = answer.split("```python")[-1].strip().replace("```", "")
if "```json" in answer:
answer = answer.split("```json")[-1].strip().replace("```", "")
if "```yaml" in answer:
answer = answer.split("```yaml")[-1].strip().replace("```", "")
if "```txt" in answer:
answer = answer.split("```txt")[-1].strip().replace("```", "")
answer = answer.capitalize()
answer = answer.replace('"', '').strip()
answer = answer.replace("'", "").strip()
answer = answer.replace("[", "").replace("]", "").strip()
return answer.strip() # Fallback to return the whole answer if no specific format found |