import os import re import asyncio from tavily import AsyncTavilyClient from llama_index.core.tools import FunctionTool from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec from llama_index.tools.wikipedia import WikipediaToolSpec from langfuse.llama_index import LlamaIndexInstrumentor from llama_index.llms.ollama import Ollama from llama_index.llms.google_genai import GoogleGenAI from llama_index.core.agent.workflow import FunctionAgent, AgentWorkflow from llama_index.core.agent.workflow import ( AgentOutput, ToolCall, ToolCallResult, ) from multimodality_tools import get_image_qa_tool, get_transcription_tool, \ get_excel_analysis_tool, get_excel_tool, get_csv_analysis_tool, get_csv_tool, _get_file, get_read_file_tool class BasicAgent: def __init__(self, ollama=False, langfuse=False): if not ollama: llm = GoogleGenAI(model="gemini-2.0-flash", api_key=os.getenv("GEMINI_API_KEY")) # llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen3-32B") #"Qwen/Qwen2.5-Coder-32B-Instruct") else: llm = Ollama(model="mistral:latest", request_timeout=120.0) # Langfuse self.langfuse = langfuse if self.langfuse: self.instrumentor = LlamaIndexInstrumentor() self.instrumentor.start() # Initialize sub-agents main_agent = FunctionAgent( name="MainAgent", description= ("Can organize and delegate work to different agents and can compile a final answer to a question from other agents' outputs."), system_prompt=( "You are a general AI assistant. I will ask you a question. " "Report your thoughts, delegate work to other agents if necessary, and" "finish your answer with the following template: " "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number " "OR as few words as possible OR a comma separated list of numbers and/or " "strings. If you are asked for a number, don't use comma to write your " "number neither use units such as $ or percent sign unless specified otherwise. " "If you are asked for a string, don't use articles, neither abbreviations (e.g. " "for cities), and write the digits in plain text unless specified otherwise. If " "you are asked for a comma separated list, apply the above rules depending of " "whether the element to be put in the list is a number or a string." ), llm=llm, tools=[get_read_file_tool()], can_handoff_to=["WikiAgent", "WebAgent", "StatsAgent", "AudioAgent", "ImageAgent"], ) # TODO Wikipedia tool does not return the tables from the page... wiki_spec = WikipediaToolSpec() wiki_search_tool = wiki_spec.to_tool_list()[1] wiki_agent = FunctionAgent( name="WikiAgent", description="Agent that can access Wikipedia to answer a question. Try using this agent if the WebAgent does not find an answer to a question.", system_prompt=( "You are a Wikipedia agent that can search Wikipedia for information and extract the relevant information to answer a question. " "You only give concise answers and if you don't find an answer to the given query on Wikipedia, " "you communicate this clearly. Always hand off your answer to MainAgent." ), llm=llm, tools=[wiki_search_tool], can_handoff_to=["MainAgent"], ) tool_spec = DuckDuckGoSearchToolSpec() search_tool = FunctionTool.from_defaults(tool_spec.duckduckgo_full_search) # In case DuckDuckGo is not good enough async def search_web(query: str) -> str: """Searches the web to answer questions.""" client = AsyncTavilyClient(api_key=os.getenv("TAVILY")) return str(await client.search(query)) web_search_agent = FunctionAgent( name="WebAgent", description="Uses the web to answer a question.", system_prompt=( "You are a Web agent that can search the Web and extract the relevant information to answer a question. " "You only give concise answers and if you don't find an answer to the given query with your tool, " "you communicate this clearly. Always hand off your answer to MainAgent." ), llm=llm, tools=[search_web], can_handoff_to=["MainAgent"], ) audio_agent = FunctionAgent( name="AudioAgent", description="Uses transcription tools to analyze audio files. This agent needs a file id and an optional question as input", system_prompt=( "You are an audio agent that can transcribe an audio file identified by its id and answer questions about the transcript. " "You only give concise answers and if you cannot answer the given query using your tool, " "you communicate this clearly. Always hand off your answer to MainAgent." ), llm=llm, tools=[get_transcription_tool()], can_handoff_to=["MainAgent"], ) image_agent = FunctionAgent( name="ImageAgent", description="Can respond to questions involving image understanding. This agent needs a file id and a question as an input.", system_prompt=( "You are an agent that can read images from a file identified by its id and answer questions about it. " "Give concise answers and only include the relevant information in you response." "If you cannot answer the given query using your tool, you communicate this clearly. " "Always hand off your answer to MainAgent." ), llm=llm, tools=[get_image_qa_tool()], can_handoff_to=["MainAgent"], ) stats_agent = FunctionAgent( name="StatsAgent", description="Uses statistical tools to read and analyse excel and csv files. This agent needs a file id and an optional question as an input", system_prompt=( "You are an agent that can read excel and csv files and run simple statistical analysis on them. " "You can use this information or the loaded file to answer questions about it. " "You only give concise answers and if you cannot answer the given query using your tool, " "you communicate this clearly. Always hand off your answer to MainAgent." ), llm=llm, tools=[get_csv_analysis_tool(), get_csv_tool(), get_excel_analysis_tool(), get_excel_tool()], can_handoff_to=["MainAgent"], ) # Main AgentWorkflow self.agent = AgentWorkflow( agents=[main_agent, wiki_agent, web_search_agent, audio_agent, image_agent, stats_agent], root_agent=main_agent.name, ) async def __call__(self, question: str, task_id: str = None) -> str: file_str = "" if file_exists(task_id): file_str = f'\nIf you need to load a file, do so by providing the id "{task_id}".' final_answer = ( "Remember to always use the template 'FINAL ANSWER: [YOUR FINAL ANSWER]' for your final output. " "Always use as few words as possible for your final answer." ) msg = f"{question}{file_str}\n{final_answer}" # Stream events handler = self.agent.run(user_msg=msg) current_agent = None current_tool_calls = "" async for event in handler.stream_events(): if ( hasattr(event, "current_agent_name") and event.current_agent_name != current_agent ): current_agent = event.current_agent_name print(f"\n{'='*50}") print(f"🤖 Agent: {current_agent}") print(f"{'='*50}\n") elif isinstance(event, AgentOutput): if event.response.content: print("📤 Output:", event.response.content) if event.tool_calls: print( "🛠️ Planning to use tools:", [call.tool_name for call in event.tool_calls], ) elif isinstance(event, ToolCallResult): print(f"🔧 Tool Result ({event.tool_name}):") print(f" Arguments: {event.tool_kwargs}") print(f" Output: {event.tool_output}") elif isinstance(event, ToolCall): print(f"🔨 Calling Tool: {event.tool_name}") print(f" With arguments: {event.tool_kwargs}") # Avoid ratelimits - 15 requests per minute await asyncio.sleep(4.1) if self.langfuse: self.instrumentor.flush() try: res = await handler res = res.response.content res = re.sub(r'^.*?FINAL ANSWER:', '', res, flags=re.DOTALL).strip() return res except: return "Error occured. No valid agent response could be determined." def file_exists(task_id: str) -> bool: try: file = _get_file(task_id) except: return False del file return True