Final_Assignment_Template

Paused

App Files Files Community

civerson916 commited on Apr 29

Commit

9e582d4

verified ·

1 Parent(s): e8ee140

Update app.py

Browse files

video_prompt_tool

Files changed (1) hide show

app.py +47 -5

app.py CHANGED Viewed

@@ -65,6 +65,45 @@ class ImageDescriptionTool(Tool):
 image_description_tool = ImageDescriptionTool()
 class AudioDescriptionTool(Tool):
     name = "audio_description"
     description = "This is a tool that will describe a local audio clip."
@@ -200,7 +239,7 @@ class BasicAgent:
             prompt_templates = yaml.safe_load(stream)
         self.agent = CodeAgent(
             model=model,
-            tools=[final_answer, search_tool, visit_webpage_tool, image_description_tool, audio_description_tool],
             max_steps=20,
             verbosity_level=1,
             additional_authorized_imports=[
@@ -308,10 +347,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             # python code
             # if item.get("task_id") == "f918266a-b3e0-4914-865d-4faa564f1aef":
             #     submitted_answer = agent(question_text)
-            # else:
-            #     continue
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
@@ -325,7 +367,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # return "Questions parsed.", pd.DataFrame(results_log)
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}

 image_description_tool = ImageDescriptionTool()
+class VideoPromptTool(Tool):
+    name = "video_prompt"
+    description = "This is a tool for prompting a YouTube video with questions to understand its content."
+    inputs = {
+        "youtube_url": {
+            "type": "string",
+            "description": "URL of the YouTube video to prompt, for example: https://www.youtube.com/watch?v=9hE5-98ZeCg",
+        },
+        "prompt": {
+            "type": "string",
+            "description": "A question about the video, for example: Please summarize the video in 3 sentences.",
+        }
+    }
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+        self.client = genai.Client(api_key=os.environ.get("GEMINI_KEY"))
+    def forward(self, youtube_url: str, prompt: str):
+        try:
+            video_description = client.models.generate_content(
+                model='models/gemini-2.0-flash',
+                contents=types.Content(
+                    parts=[
+                        types.Part(
+                            file_data=types.FileData(file_uri=youtube_url)
+                        ),
+                        types.Part(text=prompt)
+                    ]
+                )
+            )
+            return video_description.text
+        except Exception as e:
+            print(f"Error understanding video: {e}")
+            return False
+video_prompt_tool = VideoPromptTool()
 class AudioDescriptionTool(Tool):
     name = "audio_description"
     description = "This is a tool that will describe a local audio clip."
             prompt_templates = yaml.safe_load(stream)
         self.agent = CodeAgent(
             model=model,
+            tools=[final_answer, search_tool, visit_webpage_tool, image_description_tool, audio_description_tool, video_prompt_tool],
             max_steps=20,
             verbosity_level=1,
             additional_authorized_imports=[
             # python code
             # if item.get("task_id") == "f918266a-b3e0-4914-865d-4faa564f1aef":
             #     submitted_answer = agent(question_text)
+            if item.get("task_id") == "a1e91b78-d3d8-4675-bb8d-62741b4b68a6":
+                submitted_answer = agent(question_text)
+            else:
+                continue
+            # submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    return "Questions parsed.", pd.DataFrame(results_log)
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}