Samuel Thomas commited on
Commit
7ab4cd0
·
1 Parent(s): d5fffa5

ytube correction for download

Browse files
Files changed (1) hide show
  1. tools.py +120 -30
tools.py CHANGED
@@ -1351,14 +1351,11 @@ class WikipediaSearchToolWithFAISS(BaseTool):
1351
  return f"An unexpected error occurred: {str(e)}"
1352
 
1353
 
1354
-
1355
  class EnhancedYoutubeScreenshotQA(BaseTool):
1356
  name: str = "bird_species_screenshot_qa"
1357
  description: str = (
1358
  "Use this tool to calculate the number of bird species on camera at any one time,"
1359
  "Input should be a dict with keys: 'youtube_url', 'question', and optional parameters. "
1360
- #"Optional parameters: 'frame_interval_seconds' (default: 10), 'max_frames' (default: 50), "
1361
- #"'use_scene_detection' (default: True), 'parallel_processing' (default: True). "
1362
  "Example: {'youtube_url': 'https://youtube.com/watch?v=xyz', 'question': 'What animals are visible?'}"
1363
  )
1364
 
@@ -1408,7 +1405,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
1408
  def _initialize_model(self):
1409
  """Initialize BLIP model for VQA with error handling"""
1410
  try:
1411
- #self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1412
  self.device = torch.device("cpu")
1413
  print(f"Using device: {self.device}")
1414
 
@@ -1417,11 +1413,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
1417
  "Salesforce/blip-vqa-base"
1418
  ).to(self.device)
1419
 
1420
- #self.processor_vqa = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
1421
- #self.model_vqa = BlipForQuestionAnswering.from_pretrained(
1422
- # "Salesforce/blip-vqa-capfilt-large"
1423
- #).to(self.device)
1424
-
1425
  print("BLIP VQA model loaded successfully")
1426
  except Exception as e:
1427
  print(f"Error initializing VQA model: {str(e)}")
@@ -1458,7 +1449,7 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
1458
  print(f"Error saving cache: {str(e)}")
1459
 
1460
  def download_youtube_video(self, url: str, video_hash: str, cache_enabled: bool = True) -> Optional[str]:
1461
- """Enhanced YouTube video download with caching"""
1462
  video_dir = '/tmp/video/'
1463
  output_filename = f'{video_hash}.mp4'
1464
  output_path = os.path.join(video_dir, output_filename)
@@ -1469,30 +1460,137 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
1469
  return output_path
1470
 
1471
  # Clean directory
1472
- video_dir = '/tmp/video/'
1473
  self._clean_directory(video_dir)
1474
 
1475
  try:
 
1476
  ydl_opts = {
1477
- 'format': 'bestvideo[height<=720][ext=mp4]+bestaudio[ext=m4a]/best[height<=720][ext=mp4]/best',
 
1478
  'outtmpl': output_path,
1479
- 'quiet': True,
 
1480
  'merge_output_format': 'mp4',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1481
  'postprocessors': [{
1482
  'key': 'FFmpegVideoConvertor',
1483
  'preferedformat': 'mp4',
1484
  }]
1485
  }
1486
 
1487
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
1488
- ydl.download([url])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1489
 
1490
- if os.path.exists(output_path):
1491
- print(f"Video downloaded successfully: {output_path}")
1492
- return output_path
1493
- else:
1494
- print("Download completed but file not found")
1495
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1496
 
1497
  except Exception as e:
1498
  print(f"Error downloading YouTube video: {str(e)}")
@@ -1657,7 +1755,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
1657
  def _answer_question_on_frame(self, frame_path: str, question: str) -> Tuple[str, float]:
1658
  """Answer question on single frame with confidence scoring"""
1659
  try:
1660
- #ipdb.set_trace()
1661
  image = Image.open(frame_path).convert('RGB')
1662
  inputs = self.processor_vqa(image, question, return_tensors="pt").to(self.device)
1663
 
@@ -1929,7 +2026,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
1929
  "note": "No numeric results available for statistical summary"
1930
  }
1931
 
1932
-
1933
  if not answers:
1934
  return {
1935
  "final_answer": "All frame processing failed.",
@@ -1944,7 +2040,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
1944
  # Find most common cluster
1945
  largest_cluster = max(answer_clusters.items(), key=lambda x: len(x[1]))
1946
  most_common_answer = largest_cluster[0]
1947
- cluster_size = len(largest_cluster[1])
1948
 
1949
  # Calculate weighted confidence
1950
  answer_counts = Counter(answers)
@@ -1970,15 +2065,10 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
1970
  "statistical_summary": stats
1971
  }
1972
 
1973
- #def _run(self, query: Dict[str, Any]) -> str:
1974
  def _run(self, youtube_url, question, **kwargs) -> str:
1975
  """Enhanced main execution method"""
1976
- #ipdb.set_trace()
1977
  question = "How many unique bird species are on camera?"
1978
 
1979
- #input_data = query
1980
- #youtube_url = input_data.get("youtube_url")
1981
- #question = input_data.get("question")
1982
  input_data = {
1983
  'youtube_url': youtube_url,
1984
  'question': question
@@ -1996,7 +2086,7 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
1996
  cache_enabled = self._get_config('cache_enabled', True, input_data)
1997
  video_path = self.download_youtube_video(youtube_url, video_hash, cache_enabled)
1998
  if not video_path or not os.path.exists(video_path):
1999
- return "Error: Failed to download the YouTube video."
2000
 
2001
  # Step 2: Smart frame extraction
2002
  print(f"Extracting frames with smart selection...")
 
1351
  return f"An unexpected error occurred: {str(e)}"
1352
 
1353
 
 
1354
  class EnhancedYoutubeScreenshotQA(BaseTool):
1355
  name: str = "bird_species_screenshot_qa"
1356
  description: str = (
1357
  "Use this tool to calculate the number of bird species on camera at any one time,"
1358
  "Input should be a dict with keys: 'youtube_url', 'question', and optional parameters. "
 
 
1359
  "Example: {'youtube_url': 'https://youtube.com/watch?v=xyz', 'question': 'What animals are visible?'}"
1360
  )
1361
 
 
1405
  def _initialize_model(self):
1406
  """Initialize BLIP model for VQA with error handling"""
1407
  try:
 
1408
  self.device = torch.device("cpu")
1409
  print(f"Using device: {self.device}")
1410
 
 
1413
  "Salesforce/blip-vqa-base"
1414
  ).to(self.device)
1415
 
 
 
 
 
 
1416
  print("BLIP VQA model loaded successfully")
1417
  except Exception as e:
1418
  print(f"Error initializing VQA model: {str(e)}")
 
1449
  print(f"Error saving cache: {str(e)}")
1450
 
1451
  def download_youtube_video(self, url: str, video_hash: str, cache_enabled: bool = True) -> Optional[str]:
1452
+ """Enhanced YouTube video download with anti-bot measures"""
1453
  video_dir = '/tmp/video/'
1454
  output_filename = f'{video_hash}.mp4'
1455
  output_path = os.path.join(video_dir, output_filename)
 
1460
  return output_path
1461
 
1462
  # Clean directory
 
1463
  self._clean_directory(video_dir)
1464
 
1465
  try:
1466
+ # Enhanced yt-dlp options with anti-bot measures
1467
  ydl_opts = {
1468
+ # Format selection - prefer lower quality to avoid restrictions
1469
+ 'format': 'best[height<=480][ext=mp4]/best[height<=720][ext=mp4]/best[ext=mp4]/best',
1470
  'outtmpl': output_path,
1471
+ 'quiet': False, # Changed to False for debugging
1472
+ 'no_warnings': False,
1473
  'merge_output_format': 'mp4',
1474
+
1475
+ # Anti-bot headers and user agent
1476
+ 'http_headers': {
1477
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
1478
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1479
+ 'Accept-Language': 'en-us,en;q=0.5',
1480
+ 'Accept-Encoding': 'gzip,deflate',
1481
+ 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1482
+ 'Connection': 'keep-alive',
1483
+ 'Upgrade-Insecure-Requests': '1',
1484
+ },
1485
+
1486
+ # Additional anti-detection measures
1487
+ 'extractor_args': {
1488
+ 'youtube': {
1489
+ 'skip': ['hls', 'dash'], # Skip some formats that might trigger detection
1490
+ 'player_skip': ['js'], # Skip JavaScript player
1491
+ }
1492
+ },
1493
+
1494
+ # Rate limiting
1495
+ 'sleep_interval': 1,
1496
+ 'max_sleep_interval': 5,
1497
+ 'sleep_interval_subtitles': 1,
1498
+
1499
+ # Retry settings
1500
+ 'retries': 3,
1501
+ 'fragment_retries': 3,
1502
+ 'skip_unavailable_fragments': True,
1503
+
1504
+ # Cookie handling (you can add browser cookies if needed)
1505
+ # 'cookiefile': '/path/to/cookies.txt', # Uncomment and set path if you have cookies
1506
+
1507
+ # Additional options
1508
+ 'extract_flat': False,
1509
+ 'writesubtitles': False,
1510
+ 'writeautomaticsub': False,
1511
+ 'ignoreerrors': True,
1512
+
1513
+ # Postprocessors
1514
  'postprocessors': [{
1515
  'key': 'FFmpegVideoConvertor',
1516
  'preferedformat': 'mp4',
1517
  }]
1518
  }
1519
 
1520
+ print(f"Attempting to download: {url}")
1521
+
1522
+ # Try multiple download strategies
1523
+ strategies = [
1524
+ # Strategy 1: Standard download
1525
+ ydl_opts,
1526
+
1527
+ # Strategy 2: More conservative approach
1528
+ {
1529
+ **ydl_opts,
1530
+ 'format': 'worst[ext=mp4]/worst', # Try worst quality first
1531
+ 'sleep_interval': 2,
1532
+ 'max_sleep_interval': 10,
1533
+ },
1534
+
1535
+ # Strategy 3: Different user agent
1536
+ {
1537
+ **ydl_opts,
1538
+ 'http_headers': {
1539
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15'
1540
+ },
1541
+ 'format': 'best[height<=360][ext=mp4]/best[ext=mp4]/best',
1542
+ }
1543
+ ]
1544
 
1545
+ last_error = None
1546
+ for i, strategy in enumerate(strategies, 1):
1547
+ try:
1548
+ print(f"Trying download strategy {i}/3...")
1549
+
1550
+ with yt_dlp.YoutubeDL(strategy) as ydl:
1551
+ # Add some delay before download
1552
+ import time
1553
+ time.sleep(2)
1554
+
1555
+ ydl.download([url])
1556
+
1557
+ if os.path.exists(output_path):
1558
+ print(f"Video downloaded successfully with strategy {i}: {output_path}")
1559
+ return output_path
1560
+ else:
1561
+ print(f"Strategy {i} completed but file not found")
1562
+
1563
+ except Exception as e:
1564
+ last_error = e
1565
+ print(f"Strategy {i} failed: {str(e)}")
1566
+ if i < len(strategies):
1567
+ print(f"Trying next strategy...")
1568
+ # Add delay between strategies
1569
+ import time
1570
+ time.sleep(5)
1571
+ continue
1572
+
1573
+ # If all strategies failed, try one more approach with cookies from browser
1574
+ print("All standard strategies failed. Trying with browser cookies...")
1575
+ try:
1576
+ cookie_strategy = {
1577
+ **ydl_opts,
1578
+ 'cookiesfrombrowser': ('chrome',), # Try to get cookies from Chrome
1579
+ 'format': 'worst[ext=mp4]/worst',
1580
+ }
1581
+
1582
+ with yt_dlp.YoutubeDL(cookie_strategy) as ydl:
1583
+ ydl.download([url])
1584
+
1585
+ if os.path.exists(output_path):
1586
+ print(f"Video downloaded successfully with browser cookies: {output_path}")
1587
+ return output_path
1588
+
1589
+ except Exception as e:
1590
+ print(f"Browser cookie strategy also failed: {str(e)}")
1591
+
1592
+ print(f"All download strategies failed. Last error: {last_error}")
1593
+ return None
1594
 
1595
  except Exception as e:
1596
  print(f"Error downloading YouTube video: {str(e)}")
 
1755
  def _answer_question_on_frame(self, frame_path: str, question: str) -> Tuple[str, float]:
1756
  """Answer question on single frame with confidence scoring"""
1757
  try:
 
1758
  image = Image.open(frame_path).convert('RGB')
1759
  inputs = self.processor_vqa(image, question, return_tensors="pt").to(self.device)
1760
 
 
2026
  "note": "No numeric results available for statistical summary"
2027
  }
2028
 
 
2029
  if not answers:
2030
  return {
2031
  "final_answer": "All frame processing failed.",
 
2040
  # Find most common cluster
2041
  largest_cluster = max(answer_clusters.items(), key=lambda x: len(x[1]))
2042
  most_common_answer = largest_cluster[0]
 
2043
 
2044
  # Calculate weighted confidence
2045
  answer_counts = Counter(answers)
 
2065
  "statistical_summary": stats
2066
  }
2067
 
 
2068
  def _run(self, youtube_url, question, **kwargs) -> str:
2069
  """Enhanced main execution method"""
 
2070
  question = "How many unique bird species are on camera?"
2071
 
 
 
 
2072
  input_data = {
2073
  'youtube_url': youtube_url,
2074
  'question': question
 
2086
  cache_enabled = self._get_config('cache_enabled', True, input_data)
2087
  video_path = self.download_youtube_video(youtube_url, video_hash, cache_enabled)
2088
  if not video_path or not os.path.exists(video_path):
2089
+ return "Error: Failed to download the YouTube video. This may be due to YouTube's anti-bot protection. Try using a different video or implement cookie authentication."
2090
 
2091
  # Step 2: Smart frame extraction
2092
  print(f"Extracting frames with smart selection...")