Yago Bolivar commited on
Commit
baa65ee
·
1 Parent(s): afa93b5

feat: enhance YouTube video processing with improved error handling and logging

Browse files
Files changed (1) hide show
  1. src/video_processing_tool.py +85 -14
src/video_processing_tool.py CHANGED
@@ -8,14 +8,20 @@ import re
8
  import shutil
9
  import time
10
  from smolagents.tools import Tool
 
 
 
 
 
11
 
12
  class VideoProcessingTool(Tool):
13
  """
14
  Analyzes video content, extracting information such as frames, audio, or metadata.
15
  Useful for tasks like video summarization, frame extraction, transcript analysis, or content analysis.
 
16
  """
17
  name = "video_processor"
18
- description = "Analyzes video content from a file path or YouTube URL. Can extract frames, detect objects, get transcripts, and provide video metadata."
19
  inputs = {
20
  "file_path": {"type": "string", "description": "Path to the video file or YouTube URL.", "nullable": True},
21
  "task": {"type": "string", "description": "Specific task to perform (e.g., 'extract_frames', 'get_transcript', 'detect_objects', 'get_metadata').", "nullable": True},
@@ -75,24 +81,80 @@ class VideoProcessingTool(Tool):
75
  if task_parameters is None:
76
  task_parameters = {}
77
 
 
78
  is_youtube_url = file_path and ("youtube.com/" in file_path or "youtu.be/" in file_path)
79
  video_source_path = file_path
80
 
 
81
  if is_youtube_url:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  download_resolution = task_parameters.get("resolution", "360p")
83
  download_result = self.download_video(file_path, resolution=download_resolution)
 
84
  if download_result.get("error"):
 
 
 
 
 
 
 
 
 
 
85
  return download_result
 
86
  video_source_path = download_result.get("file_path")
87
  if not video_source_path or not os.path.exists(video_source_path):
88
- return {"error": f"Failed to download or locate video from URL: {file_path}"}
89
 
90
  elif file_path and not os.path.exists(file_path):
91
  return {"error": f"Video file not found: {file_path}"}
92
  elif not file_path and task not in ['get_transcript']: # transcript can work with URL directly
93
- return {"error": "File path is required for this task."}
94
-
95
 
 
96
  if task == "get_metadata":
97
  return self.get_video_metadata(video_source_path)
98
  elif task == "extract_frames":
@@ -108,7 +170,6 @@ class VideoProcessingTool(Tool):
108
  confidence_threshold = task_parameters.get("confidence_threshold", 0.5)
109
  frames_to_process = task_parameters.get("frames_to_process", 5) # Process N frames
110
  return self.detect_objects_in_video(video_source_path, confidence_threshold=confidence_threshold, num_frames_to_sample=frames_to_process)
111
- # Add more tasks as needed, e.g., extract_audio
112
  else:
113
  return {"error": f"Unsupported task: {task}"}
114
 
@@ -120,7 +181,7 @@ class VideoProcessingTool(Tool):
120
  return None
121
 
122
  def download_video(self, youtube_url, resolution="360p"):
123
- """Download YouTube video for processing."""
124
  video_id = self._extract_video_id(youtube_url)
125
  if not video_id:
126
  return {"error": "Invalid YouTube URL or could not extract video ID."}
@@ -132,6 +193,7 @@ class VideoProcessingTool(Tool):
132
  return {"success": True, "file_path": output_file_path, "message": "Video already downloaded."}
133
 
134
  try:
 
135
  ydl_opts = {
136
  'format': f'bestvideo[height<={resolution[:-1]}][ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
137
  'outtmpl': output_file_path,
@@ -139,11 +201,15 @@ class VideoProcessingTool(Tool):
139
  'quiet': True,
140
  'no_warnings': True,
141
  }
 
 
 
142
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
143
  ydl.download([youtube_url])
144
 
145
  if not os.path.exists(output_file_path): # Check if download actually created the file
146
- # Fallback for some formats if mp4 direct is not available
 
147
  ydl_opts['format'] = f'best[height<={resolution[:-1]}]' # more generic
148
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
149
  info_dict = ydl.extract_info(youtube_url, download=True)
@@ -152,12 +218,10 @@ class VideoProcessingTool(Tool):
152
  if downloaded_files:
153
  actual_file_path = os.path.join(self.temp_dir, downloaded_files[0])
154
  if actual_file_path != output_file_path and actual_file_path.endswith(('.mkv', '.webm', '.flv')):
155
- # Minimal conversion to mp4 if needed, or just use the downloaded format if cv2 supports it
156
- # For simplicity, we'll assume cv2 can handle common formats or user ensures mp4 compatible download
157
- output_file_path = actual_file_path # Use the actual downloaded file
158
- elif not actual_file_path.endswith('.mp4'): # if it's not mp4 and not handled above
159
- return {"error": f"Downloaded video is not in a directly usable format: {downloaded_files[0]}"}
160
-
161
 
162
  if os.path.exists(output_file_path):
163
  return {"success": True, "file_path": output_file_path}
@@ -165,7 +229,14 @@ class VideoProcessingTool(Tool):
165
  return {"error": "Video download failed, file not found after attempt."}
166
 
167
  except yt_dlp.utils.DownloadError as e:
168
- return {"error": f"yt-dlp download error: {str(e)}"}
 
 
 
 
 
 
 
169
  except Exception as e:
170
  return {"error": f"Failed to download video: {str(e)}"}
171
 
 
8
  import shutil
9
  import time
10
  from smolagents.tools import Tool
11
+ import logging
12
+
13
+ # Set up logging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
 
17
  class VideoProcessingTool(Tool):
18
  """
19
  Analyzes video content, extracting information such as frames, audio, or metadata.
20
  Useful for tasks like video summarization, frame extraction, transcript analysis, or content analysis.
21
+ Has limitations with YouTube content due to platform restrictions.
22
  """
23
  name = "video_processor"
24
+ description = "Analyzes video content from a file path or YouTube URL. Can extract frames, detect objects, get transcripts, and provide video metadata. Note: Has limitations with YouTube content due to platform restrictions."
25
  inputs = {
26
  "file_path": {"type": "string", "description": "Path to the video file or YouTube URL.", "nullable": True},
27
  "task": {"type": "string", "description": "Specific task to perform (e.g., 'extract_frames', 'get_transcript', 'detect_objects', 'get_metadata').", "nullable": True},
 
81
  if task_parameters is None:
82
  task_parameters = {}
83
 
84
+ # Check for YouTube URL and provide appropriate warnings
85
  is_youtube_url = file_path and ("youtube.com/" in file_path or "youtu.be/" in file_path)
86
  video_source_path = file_path
87
 
88
+ # Special case for YouTube - check for likely restrictions before attempting download
89
  if is_youtube_url:
90
+ # For transcript tasks, try direct API first without downloading
91
+ if task == "get_transcript":
92
+ transcript_result = self.get_youtube_transcript(file_path)
93
+ if not transcript_result.get("error"):
94
+ return transcript_result
95
+
96
+ # If transcript API fails with certain errors, provide more helpful response
97
+ error_msg = transcript_result.get("error", "")
98
+ if "Transcripts are disabled" in error_msg:
99
+ return {
100
+ "error": "This YouTube video has disabled transcripts. Consider these alternatives:",
101
+ "alternatives": [
102
+ "Please provide a different video with transcripts enabled",
103
+ "Upload a local video file that you have permission to use",
104
+ "Provide a text summary of the video content manually"
105
+ ]
106
+ }
107
+
108
+ # For other tasks that require downloading
109
+ logger.info(f"YouTube URL detected: {file_path}. Attempting to access content...")
110
+
111
+ # Try to get metadata about the video before downloading (title, etc.)
112
+ try:
113
+ with yt_dlp.YoutubeDL({'quiet': True, 'no_warnings': True}) as ydl:
114
+ info = ydl.extract_info(file_path, download=False)
115
+ video_title = info.get('title', 'Unknown')
116
+ logger.info(f"Video title: {video_title}")
117
+ except Exception as e:
118
+ # YouTube is likely blocking access
119
+ error_text = str(e).lower()
120
+ if any(term in error_text for term in ["forbidden", "403", "blocked", "bot", "captcha", "cookie"]):
121
+ return {
122
+ "error": "YouTube access restricted. This agent cannot access this content due to platform restrictions.",
123
+ "alternatives": [
124
+ "Please upload a local video file instead",
125
+ "For transcripts, try providing a text summary manually",
126
+ "For visual analysis, consider uploading screenshots from the video"
127
+ ]
128
+ }
129
+ return {"error": f"Failed to access video info: {str(e)}"}
130
+
131
+ # Proceed with download attempt but with better handling
132
  download_resolution = task_parameters.get("resolution", "360p")
133
  download_result = self.download_video(file_path, resolution=download_resolution)
134
+
135
  if download_result.get("error"):
136
+ error_text = download_result.get("error", "").lower()
137
+ if any(term in error_text for term in ["forbidden", "403", "blocked", "bot", "captcha", "cookie"]):
138
+ return {
139
+ "error": "YouTube download restricted. This agent cannot download this content due to platform restrictions.",
140
+ "alternatives": [
141
+ "Please upload a local video file instead",
142
+ "For transcripts, try obtaining them separately or summarizing manually",
143
+ "For visual analysis, consider uploading key frames as images"
144
+ ]
145
+ }
146
  return download_result
147
+
148
  video_source_path = download_result.get("file_path")
149
  if not video_source_path or not os.path.exists(video_source_path):
150
+ return {"error": f"Failed to download or locate video from URL: {file_path}"}
151
 
152
  elif file_path and not os.path.exists(file_path):
153
  return {"error": f"Video file not found: {file_path}"}
154
  elif not file_path and task not in ['get_transcript']: # transcript can work with URL directly
155
+ return {"error": "File path is required for this task."}
 
156
 
157
+ # Execute the appropriate task based on the request
158
  if task == "get_metadata":
159
  return self.get_video_metadata(video_source_path)
160
  elif task == "extract_frames":
 
170
  confidence_threshold = task_parameters.get("confidence_threshold", 0.5)
171
  frames_to_process = task_parameters.get("frames_to_process", 5) # Process N frames
172
  return self.detect_objects_in_video(video_source_path, confidence_threshold=confidence_threshold, num_frames_to_sample=frames_to_process)
 
173
  else:
174
  return {"error": f"Unsupported task: {task}"}
175
 
 
181
  return None
182
 
183
  def download_video(self, youtube_url, resolution="360p"):
184
+ """Download YouTube video for processing with improved error handling."""
185
  video_id = self._extract_video_id(youtube_url)
186
  if not video_id:
187
  return {"error": "Invalid YouTube URL or could not extract video ID."}
 
193
  return {"success": True, "file_path": output_file_path, "message": "Video already downloaded."}
194
 
195
  try:
196
+ # First try with default options
197
  ydl_opts = {
198
  'format': f'bestvideo[height<={resolution[:-1]}][ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
199
  'outtmpl': output_file_path,
 
201
  'quiet': True,
202
  'no_warnings': True,
203
  }
204
+
205
+ logger.info(f"Attempting to download YouTube video {video_id} at {resolution}...")
206
+
207
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
208
  ydl.download([youtube_url])
209
 
210
  if not os.path.exists(output_file_path): # Check if download actually created the file
211
+ # Fallback for some formats if mp4 direct is not available
212
+ logger.info("Primary download method failed, trying alternative format...")
213
  ydl_opts['format'] = f'best[height<={resolution[:-1]}]' # more generic
214
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
215
  info_dict = ydl.extract_info(youtube_url, download=True)
 
218
  if downloaded_files:
219
  actual_file_path = os.path.join(self.temp_dir, downloaded_files[0])
220
  if actual_file_path != output_file_path and actual_file_path.endswith(('.mkv', '.webm', '.flv')):
221
+ # Use the actual downloaded file
222
+ output_file_path = actual_file_path
223
+ elif not actual_file_path.endswith('.mp4'):
224
+ return {"error": f"Downloaded video is not in a directly usable format: {downloaded_files[0]}"}
 
 
225
 
226
  if os.path.exists(output_file_path):
227
  return {"success": True, "file_path": output_file_path}
 
229
  return {"error": "Video download failed, file not found after attempt."}
230
 
231
  except yt_dlp.utils.DownloadError as e:
232
+ error_msg = str(e)
233
+ if "Sign in to confirm your age" in error_msg:
234
+ return {"error": "Age-restricted video. Cannot download due to platform restrictions."}
235
+ elif "This video is private" in error_msg:
236
+ return {"error": "This video is private and cannot be accessed."}
237
+ elif any(term in error_msg.lower() for term in ["captcha", "bot", "cookie", "forbidden"]):
238
+ return {"error": f"YouTube access restricted due to bot detection. Consider uploading a local video file instead."}
239
+ return {"error": f"yt-dlp download error: {error_msg}"}
240
  except Exception as e:
241
  return {"error": f"Failed to download video: {str(e)}"}
242