civerson916 commited on
Commit
ed82cd0
·
verified ·
1 Parent(s): 5877e1b

Update app.py

Browse files

added image_description_tool

Files changed (1) hide show
  1. app.py +40 -6
app.py CHANGED
@@ -38,6 +38,33 @@ model = LiteLLMModel(
38
  # Import tool from Hub
39
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  class AudioDescriptionTool(Tool):
42
  name = "audio_description"
43
  description = "This is a tool that will describe a local audio clip."
@@ -145,7 +172,7 @@ wiki_tool = WikipediaSearchTool()
145
  @tool
146
  def search_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
147
  #Keep this format for the description / args / args description but feel free to modify the tool
148
- """A tool that provides web search via duckduckgo
149
  Args:
150
  arg1: the first argument
151
  arg2: the second argument
@@ -174,7 +201,7 @@ class BasicAgent:
174
  prompt_templates = yaml.safe_load(stream)
175
  self.agent = CodeAgent(
176
  model=model,
177
- tools=[final_answer, search_tool, visit_webpage_tool, audio_description_tool],
178
  max_steps=20,
179
  verbosity_level=1,
180
  additional_authorized_imports=[
@@ -224,7 +251,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
224
  questions_url = f"{api_url}/questions"
225
  submit_url = f"{api_url}/submit"
226
 
227
- # 1. Instantiate Agent ( modify this part to create your agent)
228
  try:
229
  agent = BasicAgent()
230
  except Exception as e:
@@ -271,9 +298,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
271
  # check if the file_name is not empty
272
  if item.get("file_name"):
273
  # question_text = f"{question_text} Here is the file: https://agents-course-unit4-scoring.hf.space/files/{item.get('task_id')}"
274
- question_text = f"{question_text} Here is the file (the file extension must be added): files/{item.get('task_id')}"
275
-
276
- if item.get("task_id") == "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8":
 
 
 
 
 
 
 
277
  submitted_answer = agent(question_text)
278
  else:
279
  continue
 
38
  # Import tool from Hub
39
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
40
 
41
+ class ImageDescriptionTool(Tool):
42
+ name = "image_description"
43
+ description = "This is a tool that will describe a local image file."
44
+ inputs = {
45
+ "file_name": {
46
+ "type": "string",
47
+ "description": "Complete name of the local file to describe, for example: /files/98c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea8.png",
48
+ }
49
+ }
50
+ output_type = "string"
51
+ def __init__(self):
52
+ super().__init__()
53
+ self.client = genai.Client(api_key=os.environ.get("GEMINI_KEY"))
54
+
55
+ def forward(self, file_name: str):
56
+ try:
57
+ image_file = self.client.files.upload(file=f"{file_name}")
58
+ image_description = self.client.models.generate_content(
59
+ model="gemini-2.0-flash", contents=["Describe this audio clip", mp3_file]
60
+ )
61
+ return image_description.text
62
+ except Exception as e:
63
+ print(f"Error getting image description: {e}")
64
+ return False
65
+
66
+ image_description_tool = ImageDescriptionTool()
67
+
68
  class AudioDescriptionTool(Tool):
69
  name = "audio_description"
70
  description = "This is a tool that will describe a local audio clip."
 
172
  @tool
173
  def search_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
174
  #Keep this format for the description / args / args description but feel free to modify the tool
175
+ """A tool that provides web search via duckduckgo
176
  Args:
177
  arg1: the first argument
178
  arg2: the second argument
 
201
  prompt_templates = yaml.safe_load(stream)
202
  self.agent = CodeAgent(
203
  model=model,
204
+ tools=[final_answer, search_tool, visit_webpage_tool, image_description_tool, audio_description_tool],
205
  max_steps=20,
206
  verbosity_level=1,
207
  additional_authorized_imports=[
 
251
  questions_url = f"{api_url}/questions"
252
  submit_url = f"{api_url}/submit"
253
 
254
+ # 1. Instantiate Agent (modify this part to create your agent)
255
  try:
256
  agent = BasicAgent()
257
  except Exception as e:
 
298
  # check if the file_name is not empty
299
  if item.get("file_name"):
300
  # question_text = f"{question_text} Here is the file: https://agents-course-unit4-scoring.hf.space/files/{item.get('task_id')}"
301
+ question_text = f"{question_text} Here is the file: files/{item.get('file_name')}"
302
+
303
+ # wikipedia
304
+ # if item.get("task_id") == "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8"
305
+
306
+ # chess image
307
+ # if item.get("task_id") == "cca530fc-4052-43b2-b130-b30968d8aa44":
308
+
309
+ # python code
310
+ if item.get("task_id") == "f918266a-b3e0-4914-865d-4faa564f1aef":
311
  submitted_answer = agent(question_text)
312
  else:
313
  continue