Martin Bär
commited on
Commit
·
9f1ce17
1
Parent(s):
ef24142
Use Gemini's Vision Capabilities directly instead of dedicated image tool
Browse files- app.py +4 -1
- basic_agent.py +19 -5
app.py
CHANGED
@@ -64,13 +64,16 @@ async def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
64 |
answers_payload = []
|
65 |
print(f"Running agent on {len(questions_data)} questions...")
|
66 |
for item in questions_data:
|
|
|
|
|
|
|
67 |
task_id = item.get("task_id")
|
68 |
question_text = item.get("question")
|
69 |
if not task_id or question_text is None:
|
70 |
print(f"Skipping item with missing task_id or question: {item}")
|
71 |
continue
|
72 |
try:
|
73 |
-
submitted_answer = await agent(question_text, task_id)
|
74 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
75 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
76 |
except Exception as e:
|
|
|
64 |
answers_payload = []
|
65 |
print(f"Running agent on {len(questions_data)} questions...")
|
66 |
for item in questions_data:
|
67 |
+
file_type = item.get("file_name")
|
68 |
+
if file_type:
|
69 |
+
file_type = file_type.split(".")[-1]
|
70 |
task_id = item.get("task_id")
|
71 |
question_text = item.get("question")
|
72 |
if not task_id or question_text is None:
|
73 |
print(f"Skipping item with missing task_id or question: {item}")
|
74 |
continue
|
75 |
try:
|
76 |
+
submitted_answer = await agent(question_text, task_id, file_type=file_type)
|
77 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
78 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
79 |
except Exception as e:
|
basic_agent.py
CHANGED
@@ -9,6 +9,7 @@ from llama_index.tools.wikipedia import WikipediaToolSpec
|
|
9 |
from langfuse.llama_index import LlamaIndexInstrumentor
|
10 |
from llama_index.llms.ollama import Ollama
|
11 |
from llama_index.llms.google_genai import GoogleGenAI
|
|
|
12 |
from llama_index.core.agent.workflow import FunctionAgent, AgentWorkflow
|
13 |
from llama_index.core.agent.workflow import (
|
14 |
AgentOutput,
|
@@ -17,7 +18,8 @@ from llama_index.core.agent.workflow import (
|
|
17 |
)
|
18 |
|
19 |
from multimodality_tools import get_image_qa_tool, get_transcription_tool, \
|
20 |
-
get_excel_analysis_tool, get_excel_tool, get_csv_analysis_tool, get_csv_tool, _get_file,
|
|
|
21 |
from web_tools import get_search_web_tool
|
22 |
from wiki_tool import CustomWikipediaToolSpec
|
23 |
|
@@ -140,13 +142,17 @@ class BasicAgent:
|
|
140 |
# Main AgentWorkflow
|
141 |
self.agent = AgentWorkflow(
|
142 |
agents=[main_agent, wiki_agent, web_search_agent,
|
143 |
-
audio_agent,
|
144 |
root_agent=main_agent.name,
|
145 |
)
|
146 |
|
147 |
-
async def __call__(self, question: str, task_id: str = None) -> str:
|
|
|
148 |
file_str = ""
|
149 |
-
|
|
|
|
|
|
|
150 |
file_str = f'\nIf you need to load a file, do so by providing the id "{task_id}".'
|
151 |
|
152 |
final_answer = (
|
@@ -155,6 +161,14 @@ class BasicAgent:
|
|
155 |
)
|
156 |
|
157 |
msg = f"{question}{file_str}\n{final_answer}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
# Stream events
|
160 |
handler = self.agent.run(user_msg=msg)
|
@@ -206,4 +220,4 @@ def file_exists(task_id: str) -> bool:
|
|
206 |
except:
|
207 |
return False
|
208 |
del file
|
209 |
-
return True
|
|
|
9 |
from langfuse.llama_index import LlamaIndexInstrumentor
|
10 |
from llama_index.llms.ollama import Ollama
|
11 |
from llama_index.llms.google_genai import GoogleGenAI
|
12 |
+
from llama_index.core.llms import ChatMessage, TextBlock, ImageBlock, MessageRole
|
13 |
from llama_index.core.agent.workflow import FunctionAgent, AgentWorkflow
|
14 |
from llama_index.core.agent.workflow import (
|
15 |
AgentOutput,
|
|
|
18 |
)
|
19 |
|
20 |
from multimodality_tools import get_image_qa_tool, get_transcription_tool, \
|
21 |
+
get_excel_analysis_tool, get_excel_tool, get_csv_analysis_tool, get_csv_tool, _get_file, \
|
22 |
+
get_read_file_tool, _build_file_url
|
23 |
from web_tools import get_search_web_tool
|
24 |
from wiki_tool import CustomWikipediaToolSpec
|
25 |
|
|
|
142 |
# Main AgentWorkflow
|
143 |
self.agent = AgentWorkflow(
|
144 |
agents=[main_agent, wiki_agent, web_search_agent,
|
145 |
+
audio_agent, stats_agent], # Exclude image agent for now because we are using a multimodal model.
|
146 |
root_agent=main_agent.name,
|
147 |
)
|
148 |
|
149 |
+
async def __call__(self, question: str, task_id: str = None, file_type: str = None) -> str:
|
150 |
+
# Prepare Input
|
151 |
file_str = ""
|
152 |
+
img_block = None
|
153 |
+
if file_type in ["png", "jpg", "jpeg"]:
|
154 |
+
img_block = ImageBlock(url=_build_file_url(task_id))
|
155 |
+
elif file_exists(task_id):
|
156 |
file_str = f'\nIf you need to load a file, do so by providing the id "{task_id}".'
|
157 |
|
158 |
final_answer = (
|
|
|
161 |
)
|
162 |
|
163 |
msg = f"{question}{file_str}\n{final_answer}"
|
164 |
+
if img_block:
|
165 |
+
msg = ChatMessage(
|
166 |
+
role=MessageRole.USER,
|
167 |
+
blocks=[
|
168 |
+
TextBlock(text=msg),
|
169 |
+
img_block
|
170 |
+
],
|
171 |
+
)
|
172 |
|
173 |
# Stream events
|
174 |
handler = self.agent.run(user_msg=msg)
|
|
|
220 |
except:
|
221 |
return False
|
222 |
del file
|
223 |
+
return True
|