HF_Final_Assignment_Template

Sleeping

App Files Files Community

Samuel Thomas commited on Jun 1

Commit

7ab4cd0

1 Parent(s): d5fffa5

ytube correction for download

Browse files

Files changed (1) hide show

tools.py +120 -30

tools.py CHANGED Viewed

@@ -1351,14 +1351,11 @@ class WikipediaSearchToolWithFAISS(BaseTool):
             return f"An unexpected error occurred: {str(e)}"
 class EnhancedYoutubeScreenshotQA(BaseTool):
     name: str = "bird_species_screenshot_qa"
     description: str = (
         "Use this tool to calculate the number of bird species on camera at any one time,"
         "Input should be a dict with keys: 'youtube_url', 'question', and optional parameters. "
-        #"Optional parameters: 'frame_interval_seconds' (default: 10), 'max_frames' (default: 50), "
-        #"'use_scene_detection' (default: True), 'parallel_processing' (default: True). "
         "Example: {'youtube_url': 'https://youtube.com/watch?v=xyz', 'question': 'What animals are visible?'}"
     )
@@ -1408,7 +1405,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
     def _initialize_model(self):
         """Initialize BLIP model for VQA with error handling"""
         try:
-            #self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
             self.device = torch.device("cpu")
             print(f"Using device: {self.device}")
@@ -1417,11 +1413,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
                 "Salesforce/blip-vqa-base"
             ).to(self.device)
-            #self.processor_vqa = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
-            #self.model_vqa = BlipForQuestionAnswering.from_pretrained(
-            #    "Salesforce/blip-vqa-capfilt-large"
-            #).to(self.device)
             print("BLIP VQA model loaded successfully")
         except Exception as e:
             print(f"Error initializing VQA model: {str(e)}")
@@ -1458,7 +1449,7 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
             print(f"Error saving cache: {str(e)}")
     def download_youtube_video(self, url: str, video_hash: str, cache_enabled: bool = True) -> Optional[str]:
-        """Enhanced YouTube video download with caching"""
         video_dir = '/tmp/video/'
         output_filename = f'{video_hash}.mp4'
         output_path = os.path.join(video_dir, output_filename)
@@ -1469,30 +1460,137 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
             return output_path
         # Clean directory
-        video_dir = '/tmp/video/'
         self._clean_directory(video_dir)
         try:
             ydl_opts = {
-                'format': 'bestvideo[height<=720][ext=mp4]+bestaudio[ext=m4a]/best[height<=720][ext=mp4]/best',
                 'outtmpl': output_path,
-                'quiet': True,
                 'merge_output_format': 'mp4',
                 'postprocessors': [{
                     'key': 'FFmpegVideoConvertor',
                     'preferedformat': 'mp4',
                 }]
             }
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                ydl.download([url])
-            if os.path.exists(output_path):
-                print(f"Video downloaded successfully: {output_path}")
-                return output_path
-            else:
-                print("Download completed but file not found")
-                return None
         except Exception as e:
             print(f"Error downloading YouTube video: {str(e)}")
@@ -1657,7 +1755,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
     def _answer_question_on_frame(self, frame_path: str, question: str) -> Tuple[str, float]:
         """Answer question on single frame with confidence scoring"""
         try:
-            #ipdb.set_trace()
             image = Image.open(frame_path).convert('RGB')
             inputs = self.processor_vqa(image, question, return_tensors="pt").to(self.device)
@@ -1929,7 +2026,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
                 "note": "No numeric results available for statistical summary"
             }
         if not answers:
             return {
                 "final_answer": "All frame processing failed.",
@@ -1944,7 +2040,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
         # Find most common cluster
         largest_cluster = max(answer_clusters.items(), key=lambda x: len(x[1]))
         most_common_answer = largest_cluster[0]
-        cluster_size = len(largest_cluster[1])
         # Calculate weighted confidence
         answer_counts = Counter(answers)
@@ -1970,15 +2065,10 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
             "statistical_summary": stats
         }
-    #def _run(self, query: Dict[str, Any]) -> str:
     def _run(self, youtube_url, question, **kwargs) -> str:
         """Enhanced main execution method"""
-        #ipdb.set_trace()
         question = "How many unique bird species are on camera?"
-        #input_data = query
-        #youtube_url = input_data.get("youtube_url")
-        #question = input_data.get("question")
         input_data = {
             'youtube_url': youtube_url,
             'question': question
@@ -1996,7 +2086,7 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
             cache_enabled = self._get_config('cache_enabled', True, input_data)
             video_path = self.download_youtube_video(youtube_url, video_hash, cache_enabled)
             if not video_path or not os.path.exists(video_path):
-                return "Error: Failed to download the YouTube video."
             # Step 2: Smart frame extraction
             print(f"Extracting frames with smart selection...")

             return f"An unexpected error occurred: {str(e)}"
 class EnhancedYoutubeScreenshotQA(BaseTool):
     name: str = "bird_species_screenshot_qa"
     description: str = (
         "Use this tool to calculate the number of bird species on camera at any one time,"
         "Input should be a dict with keys: 'youtube_url', 'question', and optional parameters. "
         "Example: {'youtube_url': 'https://youtube.com/watch?v=xyz', 'question': 'What animals are visible?'}"
     )
     def _initialize_model(self):
         """Initialize BLIP model for VQA with error handling"""
         try:
             self.device = torch.device("cpu")
             print(f"Using device: {self.device}")
                 "Salesforce/blip-vqa-base"
             ).to(self.device)
             print("BLIP VQA model loaded successfully")
         except Exception as e:
             print(f"Error initializing VQA model: {str(e)}")
             print(f"Error saving cache: {str(e)}")
     def download_youtube_video(self, url: str, video_hash: str, cache_enabled: bool = True) -> Optional[str]:
+        """Enhanced YouTube video download with anti-bot measures"""
         video_dir = '/tmp/video/'
         output_filename = f'{video_hash}.mp4'
         output_path = os.path.join(video_dir, output_filename)
             return output_path
         # Clean directory
         self._clean_directory(video_dir)
         try:
+            # Enhanced yt-dlp options with anti-bot measures
             ydl_opts = {
+                # Format selection - prefer lower quality to avoid restrictions
+                'format': 'best[height<=480][ext=mp4]/best[height<=720][ext=mp4]/best[ext=mp4]/best',
                 'outtmpl': output_path,
+                'quiet': False,  # Changed to False for debugging
+                'no_warnings': False,
                 'merge_output_format': 'mp4',
+                # Anti-bot headers and user agent
+                'http_headers': {
+                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+                    'Accept-Language': 'en-us,en;q=0.5',
+                    'Accept-Encoding': 'gzip,deflate',
+                    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+                    'Connection': 'keep-alive',
+                    'Upgrade-Insecure-Requests': '1',
+                },
+                # Additional anti-detection measures
+                'extractor_args': {
+                    'youtube': {
+                        'skip': ['hls', 'dash'],  # Skip some formats that might trigger detection
+                        'player_skip': ['js'],    # Skip JavaScript player
+                    }
+                },
+                # Rate limiting
+                'sleep_interval': 1,
+                'max_sleep_interval': 5,
+                'sleep_interval_subtitles': 1,
+                # Retry settings
+                'retries': 3,
+                'fragment_retries': 3,
+                'skip_unavailable_fragments': True,
+                # Cookie handling (you can add browser cookies if needed)
+                # 'cookiefile': '/path/to/cookies.txt',  # Uncomment and set path if you have cookies
+                # Additional options
+                'extract_flat': False,
+                'writesubtitles': False,
+                'writeautomaticsub': False,
+                'ignoreerrors': True,
+                # Postprocessors
                 'postprocessors': [{
                     'key': 'FFmpegVideoConvertor',
                     'preferedformat': 'mp4',
                 }]
             }
+            print(f"Attempting to download: {url}")
+            # Try multiple download strategies
+            strategies = [
+                # Strategy 1: Standard download
+                ydl_opts,
+                # Strategy 2: More conservative approach
+                {
+                    **ydl_opts,
+                    'format': 'worst[ext=mp4]/worst',  # Try worst quality first
+                    'sleep_interval': 2,
+                    'max_sleep_interval': 10,
+                },
+                # Strategy 3: Different user agent
+                {
+                    **ydl_opts,
+                    'http_headers': {
+                        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15'
+                    },
+                    'format': 'best[height<=360][ext=mp4]/best[ext=mp4]/best',
+                }
+            ]
+            last_error = None
+            for i, strategy in enumerate(strategies, 1):
+                try:
+                    print(f"Trying download strategy {i}/3...")
+                    with yt_dlp.YoutubeDL(strategy) as ydl:
+                        # Add some delay before download
+                        import time
+                        time.sleep(2)
+                        ydl.download([url])
+                    if os.path.exists(output_path):
+                        print(f"Video downloaded successfully with strategy {i}: {output_path}")
+                        return output_path
+                    else:
+                        print(f"Strategy {i} completed but file not found")
+                except Exception as e:
+                    last_error = e
+                    print(f"Strategy {i} failed: {str(e)}")
+                    if i < len(strategies):
+                        print(f"Trying next strategy...")
+                        # Add delay between strategies
+                        import time
+                        time.sleep(5)
+                    continue
+            # If all strategies failed, try one more approach with cookies from browser
+            print("All standard strategies failed. Trying with browser cookies...")
+            try:
+                cookie_strategy = {
+                    **ydl_opts,
+                    'cookiesfrombrowser': ('chrome',),  # Try to get cookies from Chrome
+                    'format': 'worst[ext=mp4]/worst',
+                }
+                with yt_dlp.YoutubeDL(cookie_strategy) as ydl:
+                    ydl.download([url])
+                if os.path.exists(output_path):
+                    print(f"Video downloaded successfully with browser cookies: {output_path}")
+                    return output_path
+            except Exception as e:
+                print(f"Browser cookie strategy also failed: {str(e)}")
+            print(f"All download strategies failed. Last error: {last_error}")
+            return None
         except Exception as e:
             print(f"Error downloading YouTube video: {str(e)}")
     def _answer_question_on_frame(self, frame_path: str, question: str) -> Tuple[str, float]:
         """Answer question on single frame with confidence scoring"""
         try:
             image = Image.open(frame_path).convert('RGB')
             inputs = self.processor_vqa(image, question, return_tensors="pt").to(self.device)
                 "note": "No numeric results available for statistical summary"
             }
         if not answers:
             return {
                 "final_answer": "All frame processing failed.",
         # Find most common cluster
         largest_cluster = max(answer_clusters.items(), key=lambda x: len(x[1]))
         most_common_answer = largest_cluster[0]
         # Calculate weighted confidence
         answer_counts = Counter(answers)
             "statistical_summary": stats
         }
     def _run(self, youtube_url, question, **kwargs) -> str:
         """Enhanced main execution method"""
         question = "How many unique bird species are on camera?"
         input_data = {
             'youtube_url': youtube_url,
             'question': question
             cache_enabled = self._get_config('cache_enabled', True, input_data)
             video_path = self.download_youtube_video(youtube_url, video_hash, cache_enabled)
             if not video_path or not os.path.exists(video_path):
+                return "Error: Failed to download the YouTube video. This may be due to YouTube's anti-bot protection. Try using a different video or implement cookie authentication."
             # Step 2: Smart frame extraction
             print(f"Extracting frames with smart selection...")