civerson916 commited on
Commit
9e582d4
·
verified ·
1 Parent(s): e8ee140

Update app.py

Browse files

video_prompt_tool

Files changed (1) hide show
  1. app.py +47 -5
app.py CHANGED
@@ -65,6 +65,45 @@ class ImageDescriptionTool(Tool):
65
 
66
  image_description_tool = ImageDescriptionTool()
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  class AudioDescriptionTool(Tool):
69
  name = "audio_description"
70
  description = "This is a tool that will describe a local audio clip."
@@ -200,7 +239,7 @@ class BasicAgent:
200
  prompt_templates = yaml.safe_load(stream)
201
  self.agent = CodeAgent(
202
  model=model,
203
- tools=[final_answer, search_tool, visit_webpage_tool, image_description_tool, audio_description_tool],
204
  max_steps=20,
205
  verbosity_level=1,
206
  additional_authorized_imports=[
@@ -308,10 +347,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
308
  # python code
309
  # if item.get("task_id") == "f918266a-b3e0-4914-865d-4faa564f1aef":
310
  # submitted_answer = agent(question_text)
311
- # else:
312
- # continue
313
 
314
- submitted_answer = agent(question_text)
 
 
 
 
 
315
 
316
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
317
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
@@ -325,7 +367,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
325
  print("Agent did not produce any answers to submit.")
326
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
327
 
328
- # return "Questions parsed.", pd.DataFrame(results_log)
329
 
330
  # 4. Prepare Submission
331
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
65
 
66
  image_description_tool = ImageDescriptionTool()
67
 
68
+ class VideoPromptTool(Tool):
69
+ name = "video_prompt"
70
+ description = "This is a tool for prompting a YouTube video with questions to understand its content."
71
+ inputs = {
72
+ "youtube_url": {
73
+ "type": "string",
74
+ "description": "URL of the YouTube video to prompt, for example: https://www.youtube.com/watch?v=9hE5-98ZeCg",
75
+ },
76
+ "prompt": {
77
+ "type": "string",
78
+ "description": "A question about the video, for example: Please summarize the video in 3 sentences.",
79
+ }
80
+ }
81
+ output_type = "string"
82
+
83
+ def __init__(self):
84
+ super().__init__()
85
+ self.client = genai.Client(api_key=os.environ.get("GEMINI_KEY"))
86
+
87
+ def forward(self, youtube_url: str, prompt: str):
88
+ try:
89
+ video_description = client.models.generate_content(
90
+ model='models/gemini-2.0-flash',
91
+ contents=types.Content(
92
+ parts=[
93
+ types.Part(
94
+ file_data=types.FileData(file_uri=youtube_url)
95
+ ),
96
+ types.Part(text=prompt)
97
+ ]
98
+ )
99
+ )
100
+ return video_description.text
101
+ except Exception as e:
102
+ print(f"Error understanding video: {e}")
103
+ return False
104
+
105
+ video_prompt_tool = VideoPromptTool()
106
+
107
  class AudioDescriptionTool(Tool):
108
  name = "audio_description"
109
  description = "This is a tool that will describe a local audio clip."
 
239
  prompt_templates = yaml.safe_load(stream)
240
  self.agent = CodeAgent(
241
  model=model,
242
+ tools=[final_answer, search_tool, visit_webpage_tool, image_description_tool, audio_description_tool, video_prompt_tool],
243
  max_steps=20,
244
  verbosity_level=1,
245
  additional_authorized_imports=[
 
347
  # python code
348
  # if item.get("task_id") == "f918266a-b3e0-4914-865d-4faa564f1aef":
349
  # submitted_answer = agent(question_text)
 
 
350
 
351
+ if item.get("task_id") == "a1e91b78-d3d8-4675-bb8d-62741b4b68a6":
352
+ submitted_answer = agent(question_text)
353
+ else:
354
+ continue
355
+
356
+ # submitted_answer = agent(question_text)
357
 
358
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
359
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
367
  print("Agent did not produce any answers to submit.")
368
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
369
 
370
+ return "Questions parsed.", pd.DataFrame(results_log)
371
 
372
  # 4. Prepare Submission
373
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}