robinsmits's picture
Code Update: 11 out of 20 correct.
4483b98
raw
history blame
4.78 kB
# Import Modules
import os
import pandas as pd
import torch
from smolagents import LiteLLMModel, OpenAIServerModel
from smolagents import (ToolCallingAgent,
CodeAgent,
GoogleSearchTool,
VisitWebpageTool,
WikipediaSearchTool,
FinalAnswerTool,
PythonInterpreterTool)
# Custom Modules
from tooling import (vision_language_tool,
read_excel_tool,
speech_to_text_tool,
youtube_captions_tool,
DuckDuckGoSearchTool)
# Agent Model
model = OpenAIServerModel(model_id = "gpt-4.1",
api_key = os.getenv('OPENAI_KEY'))
# Create Vision Agent
def create_vision_agent():
# Create Vision Agent
return ToolCallingAgent(model = model,
tools = [FinalAnswerTool(),
vision_language_tool],
name = 'vision_agent',
planning_interval = 2,
verbosity_level = 2,
max_steps = 6,
provide_run_summary = True,
description = """
A team member that will use a vision language model to answer a question about an image.
Ask him for all your questions that require answering a question about a picture or image.
Provide the file name of the image and the specific question that you want it answer.
""")
# Create Web Agent
def create_web_agent():
# Create Web Agent
return CodeAgent(model = model,
tools = [FinalAnswerTool(),
GoogleSearchTool(),
DuckDuckGoSearchTool(),
VisitWebpageTool(max_output_length = 100000),
WikipediaSearchTool(user_agent = "FinalAssignmentResearchBot ([email protected])",
language = "en",
content_type = "text",
extract_format = "WIKI")],
additional_authorized_imports = ["json",
"pandas",
're',
'bs4',
'requests',
'numpy',
'math',
'xml',
'scikit-learn'],
name = 'web_agent',
planning_interval = 3,
verbosity_level = 2,
max_steps = 20,
provide_run_summary = True,
description = """
A team member that will use various tools to search for websites, to visit websites and to parse and read information from websites.
Every question that requires to retrieve information from the internet to be answered must be answered by using the web_agent.
The gathered information to create the final answer will be reported back to the manager_agent.
""")
# Create Manager Agent
def create_manager_agent():
# Create Managed Agents
vision_agent = create_vision_agent()
web_agent = create_web_agent()
# Return Manager Agent
return CodeAgent(model = model,
tools = [FinalAnswerTool(),
PythonInterpreterTool(),
speech_to_text_tool,
youtube_captions_tool,
read_excel_tool],
name = 'manager_agent',
additional_authorized_imports = ['json',
'pandas',
're',
'bs4',
'requests',
'numpy',
'math',
'xml',
'scikit-learn'],
planning_interval = 3,
verbosity_level = 2,
stream_outputs = True,
max_steps = 25,
provide_run_summary = True,
managed_agents = [vision_agent, web_agent])