# Import Modules import os import pandas as pd import torch from smolagents import LiteLLMModel, OpenAIServerModel from smolagents import (ToolCallingAgent, CodeAgent, GoogleSearchTool, VisitWebpageTool, WikipediaSearchTool, FinalAnswerTool, PythonInterpreterTool) # Custom Modules from tooling import (vision_language_tool, read_excel_tool, speech_to_text_tool, youtube_captions_tool, DuckDuckGoSearchTool) # Agent Model model = OpenAIServerModel(model_id = "gpt-4.1", api_key = os.getenv('OPENAI_KEY')) # Create Vision Agent def create_vision_agent(): # Create Vision Agent return ToolCallingAgent(model = model, tools = [FinalAnswerTool(), vision_language_tool], name = 'vision_agent', planning_interval = 2, verbosity_level = 2, max_steps = 6, provide_run_summary = True, description = """ A team member that will use a vision language model to answer a question about an image. Ask him for all your questions that require answering a question about a picture or image. Provide the file name of the image and the specific question that you want it answer. """) # Create Web Agent def create_web_agent(): # Create Web Agent return CodeAgent(model = model, tools = [FinalAnswerTool(), GoogleSearchTool(), DuckDuckGoSearchTool(), VisitWebpageTool(max_output_length = 100000), WikipediaSearchTool(user_agent = "FinalAssignmentResearchBot (myemail@example.com)", language = "en", content_type = "text", extract_format = "WIKI")], additional_authorized_imports = ["json", "pandas", 're', 'bs4', 'requests', 'numpy', 'math', 'xml', 'scikit-learn'], name = 'web_agent', planning_interval = 3, verbosity_level = 2, max_steps = 20, provide_run_summary = True, description = """ A team member that will use various tools to search for websites, to visit websites and to parse and read information from websites. Every question that requires to retrieve information from the internet to be answered must be answered by using the web_agent. The gathered information to create the final answer will be reported back to the manager_agent. """) # Create Manager Agent def create_manager_agent(): # Create Managed Agents vision_agent = create_vision_agent() web_agent = create_web_agent() # Return Manager Agent return CodeAgent(model = model, tools = [FinalAnswerTool(), PythonInterpreterTool(), speech_to_text_tool, youtube_captions_tool, read_excel_tool], name = 'manager_agent', additional_authorized_imports = ['json', 'pandas', 're', 'bs4', 'requests', 'numpy', 'math', 'xml', 'scikit-learn'], planning_interval = 3, verbosity_level = 2, stream_outputs = True, max_steps = 25, provide_run_summary = True, managed_agents = [vision_agent, web_agent])