Update app.py
Browse filesadded image_description_tool
app.py
CHANGED
@@ -38,6 +38,33 @@ model = LiteLLMModel(
|
|
38 |
# Import tool from Hub
|
39 |
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
class AudioDescriptionTool(Tool):
|
42 |
name = "audio_description"
|
43 |
description = "This is a tool that will describe a local audio clip."
|
@@ -145,7 +172,7 @@ wiki_tool = WikipediaSearchTool()
|
|
145 |
@tool
|
146 |
def search_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
|
147 |
#Keep this format for the description / args / args description but feel free to modify the tool
|
148 |
-
"""A tool that provides web search via duckduckgo
|
149 |
Args:
|
150 |
arg1: the first argument
|
151 |
arg2: the second argument
|
@@ -174,7 +201,7 @@ class BasicAgent:
|
|
174 |
prompt_templates = yaml.safe_load(stream)
|
175 |
self.agent = CodeAgent(
|
176 |
model=model,
|
177 |
-
tools=[final_answer, search_tool, visit_webpage_tool, audio_description_tool],
|
178 |
max_steps=20,
|
179 |
verbosity_level=1,
|
180 |
additional_authorized_imports=[
|
@@ -224,7 +251,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
224 |
questions_url = f"{api_url}/questions"
|
225 |
submit_url = f"{api_url}/submit"
|
226 |
|
227 |
-
# 1. Instantiate Agent (
|
228 |
try:
|
229 |
agent = BasicAgent()
|
230 |
except Exception as e:
|
@@ -271,9 +298,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
271 |
# check if the file_name is not empty
|
272 |
if item.get("file_name"):
|
273 |
# question_text = f"{question_text} Here is the file: https://agents-course-unit4-scoring.hf.space/files/{item.get('task_id')}"
|
274 |
-
question_text = f"{question_text} Here is the file
|
275 |
-
|
276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
277 |
submitted_answer = agent(question_text)
|
278 |
else:
|
279 |
continue
|
|
|
38 |
# Import tool from Hub
|
39 |
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
|
40 |
|
41 |
+
class ImageDescriptionTool(Tool):
|
42 |
+
name = "image_description"
|
43 |
+
description = "This is a tool that will describe a local image file."
|
44 |
+
inputs = {
|
45 |
+
"file_name": {
|
46 |
+
"type": "string",
|
47 |
+
"description": "Complete name of the local file to describe, for example: /files/98c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea8.png",
|
48 |
+
}
|
49 |
+
}
|
50 |
+
output_type = "string"
|
51 |
+
def __init__(self):
|
52 |
+
super().__init__()
|
53 |
+
self.client = genai.Client(api_key=os.environ.get("GEMINI_KEY"))
|
54 |
+
|
55 |
+
def forward(self, file_name: str):
|
56 |
+
try:
|
57 |
+
image_file = self.client.files.upload(file=f"{file_name}")
|
58 |
+
image_description = self.client.models.generate_content(
|
59 |
+
model="gemini-2.0-flash", contents=["Describe this audio clip", mp3_file]
|
60 |
+
)
|
61 |
+
return image_description.text
|
62 |
+
except Exception as e:
|
63 |
+
print(f"Error getting image description: {e}")
|
64 |
+
return False
|
65 |
+
|
66 |
+
image_description_tool = ImageDescriptionTool()
|
67 |
+
|
68 |
class AudioDescriptionTool(Tool):
|
69 |
name = "audio_description"
|
70 |
description = "This is a tool that will describe a local audio clip."
|
|
|
172 |
@tool
|
173 |
def search_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
|
174 |
#Keep this format for the description / args / args description but feel free to modify the tool
|
175 |
+
"""A tool that provides web search via duckduckgo
|
176 |
Args:
|
177 |
arg1: the first argument
|
178 |
arg2: the second argument
|
|
|
201 |
prompt_templates = yaml.safe_load(stream)
|
202 |
self.agent = CodeAgent(
|
203 |
model=model,
|
204 |
+
tools=[final_answer, search_tool, visit_webpage_tool, image_description_tool, audio_description_tool],
|
205 |
max_steps=20,
|
206 |
verbosity_level=1,
|
207 |
additional_authorized_imports=[
|
|
|
251 |
questions_url = f"{api_url}/questions"
|
252 |
submit_url = f"{api_url}/submit"
|
253 |
|
254 |
+
# 1. Instantiate Agent (modify this part to create your agent)
|
255 |
try:
|
256 |
agent = BasicAgent()
|
257 |
except Exception as e:
|
|
|
298 |
# check if the file_name is not empty
|
299 |
if item.get("file_name"):
|
300 |
# question_text = f"{question_text} Here is the file: https://agents-course-unit4-scoring.hf.space/files/{item.get('task_id')}"
|
301 |
+
question_text = f"{question_text} Here is the file: files/{item.get('file_name')}"
|
302 |
+
|
303 |
+
# wikipedia
|
304 |
+
# if item.get("task_id") == "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8"
|
305 |
+
|
306 |
+
# chess image
|
307 |
+
# if item.get("task_id") == "cca530fc-4052-43b2-b130-b30968d8aa44":
|
308 |
+
|
309 |
+
# python code
|
310 |
+
if item.get("task_id") == "f918266a-b3e0-4914-865d-4faa564f1aef":
|
311 |
submitted_answer = agent(question_text)
|
312 |
else:
|
313 |
continue
|