Final_Assignment_Template

Paused

App Files Files Community

civerson916 commited on Apr 29

Commit

ed82cd0

verified ·

1 Parent(s): 5877e1b

Update app.py

Browse files

added image_description_tool

Files changed (1) hide show

app.py +40 -6

app.py CHANGED Viewed

@@ -38,6 +38,33 @@ model = LiteLLMModel(
 # Import tool from Hub
 image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
 class AudioDescriptionTool(Tool):
     name = "audio_description"
     description = "This is a tool that will describe a local audio clip."
@@ -145,7 +172,7 @@ wiki_tool = WikipediaSearchTool()
 @tool
 def search_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
     #Keep this format for the description / args / args description but feel free to modify the tool
-    """A tool that provides web search via duckduckgo
     Args:
         arg1: the first argument
         arg2: the second argument
@@ -174,7 +201,7 @@ class BasicAgent:
             prompt_templates = yaml.safe_load(stream)
         self.agent = CodeAgent(
             model=model,
-            tools=[final_answer, search_tool, visit_webpage_tool, audio_description_tool],
             max_steps=20,
             verbosity_level=1,
             additional_authorized_imports=[
@@ -224,7 +251,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
         agent = BasicAgent()
     except Exception as e:
@@ -271,9 +298,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             # check if the file_name is not empty
             if item.get("file_name"):
                 # question_text = f"{question_text} Here is the file: https://agents-course-unit4-scoring.hf.space/files/{item.get('task_id')}"
-                question_text = f"{question_text} Here is the file (the file extension must be added): files/{item.get('task_id')}"
-            if item.get("task_id") == "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8":
                 submitted_answer = agent(question_text)
             else:
                 continue

 # Import tool from Hub
 image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
+class ImageDescriptionTool(Tool):
+    name = "image_description"
+    description = "This is a tool that will describe a local image file."
+    inputs = {
+        "file_name": {
+            "type": "string",
+            "description": "Complete name of the local file to describe, for example: /files/98c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea8.png",
+        }
+    }
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+        self.client = genai.Client(api_key=os.environ.get("GEMINI_KEY"))
+    def forward(self, file_name: str):
+        try:
+            image_file = self.client.files.upload(file=f"{file_name}")
+            image_description = self.client.models.generate_content(
+                model="gemini-2.0-flash", contents=["Describe this audio clip", mp3_file]
+            )
+            return image_description.text
+        except Exception as e:
+            print(f"Error getting image description: {e}")
+            return False
+image_description_tool = ImageDescriptionTool()
 class AudioDescriptionTool(Tool):
     name = "audio_description"
     description = "This is a tool that will describe a local audio clip."
 @tool
 def search_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
     #Keep this format for the description / args / args description but feel free to modify the tool
+    """A tool that provides web search via duckduckgo
     Args:
         arg1: the first argument
         arg2: the second argument
             prompt_templates = yaml.safe_load(stream)
         self.agent = CodeAgent(
             model=model,
+            tools=[final_answer, search_tool, visit_webpage_tool, image_description_tool, audio_description_tool],
             max_steps=20,
             verbosity_level=1,
             additional_authorized_imports=[
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent (modify this part to create your agent)
     try:
         agent = BasicAgent()
     except Exception as e:
             # check if the file_name is not empty
             if item.get("file_name"):
                 # question_text = f"{question_text} Here is the file: https://agents-course-unit4-scoring.hf.space/files/{item.get('task_id')}"
+                question_text = f"{question_text} Here is the file: files/{item.get('file_name')}"
+            # wikipedia
+            # if item.get("task_id") == "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8"
+            # chess image
+            # if item.get("task_id") == "cca530fc-4052-43b2-b130-b30968d8aa44":
+            # python code
+            if item.get("task_id") == "f918266a-b3e0-4914-865d-4faa564f1aef":
                 submitted_answer = agent(question_text)
             else:
                 continue