Dkapsis's picture
image analysis agent
64c3879
raw
history blame
3.27 kB
from smolagents import OpenAIServerModel, CodeAgent, InferenceClientModel, DuckDuckGoSearchTool, VisitWebpageTool
import markdownify
import tools
import prompts
MANAGER_MODEL = "deepseek-ai/DeepSeek-R1"
AGENT_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
FINAL_ANSWER_MODEL = "deepseek-ai/DeepSeek-R1" # OpenAIServerModel
WEB_SEARCH_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
IMAGE_ANALYSIS_MODEL = "HuggingFaceM4/idefics2-8b"
AUDIO_ANALYSIS_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
VIDEO_ANALYSIS_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
YOUTUBE_ANALYSIS_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
DOCUMENT_ANALYSIS_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
ARITHMETIC_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
CODE_GENERATION_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
CODE_EXECUTION_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
# Agents
def create_custom_web_search_agent(message):
return CodeAgent(
name="custom_web_search_agent",
description=prompts.get_web_search_prompt(message),
model=InferenceClientModel(WEB_SEARCH_MODEL),
max_steps=2,
tools=[tools.simple_web_search_tool, tools.visit_web_page_tool],
)
def create_simple_web_search_agent(message):
return CodeAgent(
name="simple_web_search_agent",
description=prompts.get_web_search_prompt(message),
model=InferenceClientModel(WEB_SEARCH_MODEL),
max_steps=2,
tools=[tools.simple_web_search_tool, tools.visit_web_page_tool],
)
def create_image_analysis_agent(message):
return CodeAgent(
name="image_analysis_agent",
description=prompts.get_image_analysis_prompt(message),
model=InferenceClientModel(IMAGE_ANALYSIS_MODEL),
tools=[image_analysis_tool],
max_steps=2,
)
def create_manager_agent(message):
simple_web_search_agent = create_simple_web_search_agent(message)
image_analysis_agent = create_image_analysis_agent(message)
return CodeAgent(
name="manager_agent",
model=InferenceClientModel(MANAGER_MODEL, provider="together", max_tokens=8096),
description=prompts.get_manager_prompt(message),
tools=[],
planning_interval=4,
verbosity_level=2,
managed_agents=[
simple_web_search_agent,
image_analysis_agent,
],
max_steps=10,
additional_authorized_imports=[
"requests",
"zipfile",
"os",
"pandas",
"numpy",
"sympy",
"json",
"bs4",
"pubchempy",
"xml",
"yahoo_finance",
"Bio",
"sklearn",
"scipy",
"pydub",
"io",
"PIL",
"chess",
"PyPDF2",
"pptx",
"torch",
"datetime",
"csv",
"fractions",
],
)
def create_final_answer_agent(message):
return CodeAgent(
name="final_answer_agent",
description="Given a question and an initial answer, return the final refined answer following strict formatting rules.",
model=InferenceClientModel(FINAL_ANSWER_MODEL),
max_steps=2,
tools=[],
)