File size: 9,707 Bytes
dbb14b6
 
596707c
dbb14b6
 
b3d4017
 
 
c97999e
eaf9631
09a402e
dbb14b6
 
 
 
 
 
 
6fec0c8
 
c89b357
ba66f78
b3d4017
6fec0c8
09a402e
dbb14b6
 
09a402e
 
 
 
 
 
 
 
b3d4017
dbb14b6
b3d4017
dbb14b6
 
2640254
dbb14b6
2fa94b3
dbb14b6
 
2fa94b3
 
 
 
 
 
 
 
dbb14b6
 
c89b357
dbb14b6
c97999e
 
353745d
dbb14b6
 
 
 
 
7735763
dbb14b6
57aba38
dbb14b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57aba38
dbb14b6
 
 
 
 
 
 
 
 
 
d9145d8
dbb14b6
57aba38
dbb14b6
 
 
 
 
 
 
 
 
 
d9145d8
dbb14b6
 
d9145d8
 
 
dbb14b6
 
 
 
 
 
 
f31b7de
d9145d8
dbb14b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3d4017
6fec0c8
 
20816f3
6fec0c8
 
2640254
 
 
 
 
 
dbb14b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09a402e
df341b9
 
596707c
09a402e
 
615b507
 
 
 
 
 
 
df341b9
20816f3
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import os
import re
import asyncio

from tavily import AsyncTavilyClient
from llama_index.core.tools import FunctionTool
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
from llama_index.tools.wikipedia import WikipediaToolSpec
from langfuse.llama_index import LlamaIndexInstrumentor
from llama_index.llms.ollama import Ollama
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.core.agent.workflow import FunctionAgent, AgentWorkflow
from llama_index.core.agent.workflow import (
    AgentOutput,
    ToolCall,
    ToolCallResult,
)

from multimodality_tools import get_image_qa_tool, get_transcription_tool, \
    get_excel_analysis_tool, get_excel_tool, get_csv_analysis_tool, get_csv_tool, _get_file, get_read_file_tool

class BasicAgent:
    def __init__(self, ollama=False, langfuse=False):
        if not ollama:
            llm = GoogleGenAI(model="gemini-2.0-flash", api_key=os.getenv("GEMINI_API_KEY"))
            # llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen3-32B") #"Qwen/Qwen2.5-Coder-32B-Instruct")
        else:
            llm = Ollama(model="mistral:latest", request_timeout=120.0)

        # Langfuse
        self.langfuse = langfuse
        if self.langfuse:
            self.instrumentor = LlamaIndexInstrumentor()
            self.instrumentor.start()

        # Initialize sub-agents

        main_agent = FunctionAgent(
            name="MainAgent",
            description= ("Can organize and delegate work to different agents and can compile a final answer to a question from other agents' outputs."),
            system_prompt=(
                "You are a general AI assistant. I will ask you a question. "
                "Report your thoughts, delegate work to other agents if necessary, and" 
                "finish your answer with the following template: "
                "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number "
                "OR as few words as possible OR a comma separated list of numbers and/or "
                "strings. If you are asked for a number, don't use comma to write your "
                "number neither use units such as $ or percent sign unless specified otherwise. "
                "If you are asked for a string, don't use articles, neither abbreviations (e.g. "
                "for cities), and write the digits in plain text unless specified otherwise. If "
                "you are asked for a comma separated list, apply the above rules depending of "
                "whether the element to be put in the list is a number or a string."
            ),
            llm=llm,
            tools=[get_read_file_tool()],
            can_handoff_to=["WikiAgent", "WebAgent", "StatsAgent", "AudioAgent", "ImageAgent"],
        )

        # TODO Wikipedia tool does not return the tables from the page...
        wiki_spec = WikipediaToolSpec()
        wiki_search_tool = wiki_spec.to_tool_list()[1]

        wiki_agent = FunctionAgent(
            name="WikiAgent",
            description="Agent that can access Wikipedia to answer a question. Try using this agent if the WebAgent does not find an answer to a question.",
            system_prompt=(
                "You are a Wikipedia agent that can search Wikipedia for information and extract the relevant information to answer a question. "
                "You only give concise answers and if you don't find an answer to the given query on Wikipedia, "
                "you communicate this clearly. Always hand off your answer to MainAgent."
            ),
            llm=llm,
            tools=[wiki_search_tool],
            can_handoff_to=["MainAgent"],
        )

        tool_spec = DuckDuckGoSearchToolSpec()
        search_tool = FunctionTool.from_defaults(tool_spec.duckduckgo_full_search)
        # In case DuckDuckGo is not good enough
        async def search_web(query: str) -> str:
            """Searches the web to answer questions."""
            client = AsyncTavilyClient(api_key=os.getenv("TAVILY"))
            return str(await client.search(query))

        web_search_agent = FunctionAgent(
            name="WebAgent",
            description="Uses the web to answer a question.",
            system_prompt=(
                "You are a Web agent that can search the Web and extract the relevant information to answer a question. "
                "You only give concise answers and if you don't find an answer to the given query with your tool, "
                "you communicate this clearly. Always hand off your answer to MainAgent."
            ),
            llm=llm,
            tools=[search_web],
            can_handoff_to=["MainAgent"],
        )

        audio_agent = FunctionAgent(
            name="AudioAgent",
            description="Uses transcription tools to analyze audio files. This agent needs a file id and an optional question as input",
            system_prompt=(
                "You are an audio agent that can transcribe an audio file identified by its id and answer questions about the transcript. "
                "You only give concise answers and if you cannot answer the given query using your tool, "
                "you communicate this clearly. Always hand off your answer to MainAgent."
            ),
            llm=llm,
            tools=[get_transcription_tool()],
            can_handoff_to=["MainAgent"],
        )

        image_agent = FunctionAgent(
            name="ImageAgent",
            description="Can respond to questions involving image understanding. This agent needs a file id and a question as an input.",
            system_prompt=(
                "You are an agent that can read images from a file identified by its id and answer questions about it. "
                "Give concise answers and only include the relevant information in you response." 
                "If you cannot answer the given query using your tool, you communicate this clearly. "
                "Always hand off your answer to MainAgent."
            ),
            llm=llm,
            tools=[get_image_qa_tool()],
            can_handoff_to=["MainAgent"],
        )

        stats_agent = FunctionAgent(
            name="StatsAgent",
            description="Uses statistical tools to read and analyse excel and csv files. This agent needs a file id and an optional question as an input",
            system_prompt=(
                "You are an agent that can read excel and csv files and run simple statistical analysis on them. "
                "You can use this information or the loaded file to answer questions about it. "
                "You only give concise answers and if you cannot answer the given query using your tool, "
                "you communicate this clearly. Always hand off your answer to MainAgent."
            ),
            llm=llm,
            tools=[get_csv_analysis_tool(), get_csv_tool(),
                   get_excel_analysis_tool(), get_excel_tool()],
            can_handoff_to=["MainAgent"],
        )
        
        # Main AgentWorkflow
        self.agent = AgentWorkflow(
            agents=[main_agent, wiki_agent, web_search_agent,
                    audio_agent, image_agent, stats_agent],
            root_agent=main_agent.name,
        )

    async def __call__(self, question: str, task_id: str = None) -> str:
        file_str = ""
        if file_exists(task_id):
            file_str = f'\nIf you need to load a file, do so by providing the id "{task_id}".'

        final_answer = (
            "Remember to always use the template 'FINAL ANSWER: [YOUR FINAL ANSWER]' for your final output. "
            "Always use as few words as possible for your final answer."
        )

        msg = f"{question}{file_str}\n{final_answer}"

        # Stream events
        handler = self.agent.run(user_msg=msg)

        current_agent = None
        current_tool_calls = ""
        async for event in handler.stream_events():
            if (
                hasattr(event, "current_agent_name")
                and event.current_agent_name != current_agent
            ):
                current_agent = event.current_agent_name
                print(f"\n{'='*50}")
                print(f"🤖 Agent: {current_agent}")
                print(f"{'='*50}\n")
            elif isinstance(event, AgentOutput):
                if event.response.content:
                    print("📤 Output:", event.response.content)
                if event.tool_calls:
                    print(
                        "🛠️  Planning to use tools:",
                        [call.tool_name for call in event.tool_calls],
                    )
            elif isinstance(event, ToolCallResult):
                print(f"🔧 Tool Result ({event.tool_name}):")
                print(f"  Arguments: {event.tool_kwargs}")
                print(f"  Output: {event.tool_output}")
            elif isinstance(event, ToolCall):
                print(f"🔨 Calling Tool: {event.tool_name}")
                print(f"  With arguments: {event.tool_kwargs}")

            # Avoid ratelimits - 15 requests per minute
            await asyncio.sleep(4.1)

        if self.langfuse:
            self.instrumentor.flush()
        
        try:
            res = await handler
            res = res.response.content
            res = re.sub(r'^.*?FINAL ANSWER:', '', res, flags=re.DOTALL).strip()
            return res
        except:
            return "Error occured. No valid agent response could be determined."

def file_exists(task_id: str) -> bool:
    try:
        file = _get_file(task_id)
    except:
        return False
    del file
    return True