Spaces:
Sleeping
Sleeping
Samuel Thomas
commited on
Commit
·
7ab4cd0
1
Parent(s):
d5fffa5
ytube correction for download
Browse files
tools.py
CHANGED
@@ -1351,14 +1351,11 @@ class WikipediaSearchToolWithFAISS(BaseTool):
|
|
1351 |
return f"An unexpected error occurred: {str(e)}"
|
1352 |
|
1353 |
|
1354 |
-
|
1355 |
class EnhancedYoutubeScreenshotQA(BaseTool):
|
1356 |
name: str = "bird_species_screenshot_qa"
|
1357 |
description: str = (
|
1358 |
"Use this tool to calculate the number of bird species on camera at any one time,"
|
1359 |
"Input should be a dict with keys: 'youtube_url', 'question', and optional parameters. "
|
1360 |
-
#"Optional parameters: 'frame_interval_seconds' (default: 10), 'max_frames' (default: 50), "
|
1361 |
-
#"'use_scene_detection' (default: True), 'parallel_processing' (default: True). "
|
1362 |
"Example: {'youtube_url': 'https://youtube.com/watch?v=xyz', 'question': 'What animals are visible?'}"
|
1363 |
)
|
1364 |
|
@@ -1408,7 +1405,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
1408 |
def _initialize_model(self):
|
1409 |
"""Initialize BLIP model for VQA with error handling"""
|
1410 |
try:
|
1411 |
-
#self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
1412 |
self.device = torch.device("cpu")
|
1413 |
print(f"Using device: {self.device}")
|
1414 |
|
@@ -1417,11 +1413,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
1417 |
"Salesforce/blip-vqa-base"
|
1418 |
).to(self.device)
|
1419 |
|
1420 |
-
#self.processor_vqa = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
|
1421 |
-
#self.model_vqa = BlipForQuestionAnswering.from_pretrained(
|
1422 |
-
# "Salesforce/blip-vqa-capfilt-large"
|
1423 |
-
#).to(self.device)
|
1424 |
-
|
1425 |
print("BLIP VQA model loaded successfully")
|
1426 |
except Exception as e:
|
1427 |
print(f"Error initializing VQA model: {str(e)}")
|
@@ -1458,7 +1449,7 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
1458 |
print(f"Error saving cache: {str(e)}")
|
1459 |
|
1460 |
def download_youtube_video(self, url: str, video_hash: str, cache_enabled: bool = True) -> Optional[str]:
|
1461 |
-
"""Enhanced YouTube video download with
|
1462 |
video_dir = '/tmp/video/'
|
1463 |
output_filename = f'{video_hash}.mp4'
|
1464 |
output_path = os.path.join(video_dir, output_filename)
|
@@ -1469,30 +1460,137 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
1469 |
return output_path
|
1470 |
|
1471 |
# Clean directory
|
1472 |
-
video_dir = '/tmp/video/'
|
1473 |
self._clean_directory(video_dir)
|
1474 |
|
1475 |
try:
|
|
|
1476 |
ydl_opts = {
|
1477 |
-
|
|
|
1478 |
'outtmpl': output_path,
|
1479 |
-
'quiet':
|
|
|
1480 |
'merge_output_format': 'mp4',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1481 |
'postprocessors': [{
|
1482 |
'key': 'FFmpegVideoConvertor',
|
1483 |
'preferedformat': 'mp4',
|
1484 |
}]
|
1485 |
}
|
1486 |
|
1487 |
-
|
1488 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1489 |
|
1490 |
-
|
1491 |
-
|
1492 |
-
|
1493 |
-
|
1494 |
-
|
1495 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1496 |
|
1497 |
except Exception as e:
|
1498 |
print(f"Error downloading YouTube video: {str(e)}")
|
@@ -1657,7 +1755,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
1657 |
def _answer_question_on_frame(self, frame_path: str, question: str) -> Tuple[str, float]:
|
1658 |
"""Answer question on single frame with confidence scoring"""
|
1659 |
try:
|
1660 |
-
#ipdb.set_trace()
|
1661 |
image = Image.open(frame_path).convert('RGB')
|
1662 |
inputs = self.processor_vqa(image, question, return_tensors="pt").to(self.device)
|
1663 |
|
@@ -1929,7 +2026,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
1929 |
"note": "No numeric results available for statistical summary"
|
1930 |
}
|
1931 |
|
1932 |
-
|
1933 |
if not answers:
|
1934 |
return {
|
1935 |
"final_answer": "All frame processing failed.",
|
@@ -1944,7 +2040,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
1944 |
# Find most common cluster
|
1945 |
largest_cluster = max(answer_clusters.items(), key=lambda x: len(x[1]))
|
1946 |
most_common_answer = largest_cluster[0]
|
1947 |
-
cluster_size = len(largest_cluster[1])
|
1948 |
|
1949 |
# Calculate weighted confidence
|
1950 |
answer_counts = Counter(answers)
|
@@ -1970,15 +2065,10 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
1970 |
"statistical_summary": stats
|
1971 |
}
|
1972 |
|
1973 |
-
#def _run(self, query: Dict[str, Any]) -> str:
|
1974 |
def _run(self, youtube_url, question, **kwargs) -> str:
|
1975 |
"""Enhanced main execution method"""
|
1976 |
-
#ipdb.set_trace()
|
1977 |
question = "How many unique bird species are on camera?"
|
1978 |
|
1979 |
-
#input_data = query
|
1980 |
-
#youtube_url = input_data.get("youtube_url")
|
1981 |
-
#question = input_data.get("question")
|
1982 |
input_data = {
|
1983 |
'youtube_url': youtube_url,
|
1984 |
'question': question
|
@@ -1996,7 +2086,7 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
1996 |
cache_enabled = self._get_config('cache_enabled', True, input_data)
|
1997 |
video_path = self.download_youtube_video(youtube_url, video_hash, cache_enabled)
|
1998 |
if not video_path or not os.path.exists(video_path):
|
1999 |
-
return "Error: Failed to download the YouTube video."
|
2000 |
|
2001 |
# Step 2: Smart frame extraction
|
2002 |
print(f"Extracting frames with smart selection...")
|
|
|
1351 |
return f"An unexpected error occurred: {str(e)}"
|
1352 |
|
1353 |
|
|
|
1354 |
class EnhancedYoutubeScreenshotQA(BaseTool):
|
1355 |
name: str = "bird_species_screenshot_qa"
|
1356 |
description: str = (
|
1357 |
"Use this tool to calculate the number of bird species on camera at any one time,"
|
1358 |
"Input should be a dict with keys: 'youtube_url', 'question', and optional parameters. "
|
|
|
|
|
1359 |
"Example: {'youtube_url': 'https://youtube.com/watch?v=xyz', 'question': 'What animals are visible?'}"
|
1360 |
)
|
1361 |
|
|
|
1405 |
def _initialize_model(self):
|
1406 |
"""Initialize BLIP model for VQA with error handling"""
|
1407 |
try:
|
|
|
1408 |
self.device = torch.device("cpu")
|
1409 |
print(f"Using device: {self.device}")
|
1410 |
|
|
|
1413 |
"Salesforce/blip-vqa-base"
|
1414 |
).to(self.device)
|
1415 |
|
|
|
|
|
|
|
|
|
|
|
1416 |
print("BLIP VQA model loaded successfully")
|
1417 |
except Exception as e:
|
1418 |
print(f"Error initializing VQA model: {str(e)}")
|
|
|
1449 |
print(f"Error saving cache: {str(e)}")
|
1450 |
|
1451 |
def download_youtube_video(self, url: str, video_hash: str, cache_enabled: bool = True) -> Optional[str]:
|
1452 |
+
"""Enhanced YouTube video download with anti-bot measures"""
|
1453 |
video_dir = '/tmp/video/'
|
1454 |
output_filename = f'{video_hash}.mp4'
|
1455 |
output_path = os.path.join(video_dir, output_filename)
|
|
|
1460 |
return output_path
|
1461 |
|
1462 |
# Clean directory
|
|
|
1463 |
self._clean_directory(video_dir)
|
1464 |
|
1465 |
try:
|
1466 |
+
# Enhanced yt-dlp options with anti-bot measures
|
1467 |
ydl_opts = {
|
1468 |
+
# Format selection - prefer lower quality to avoid restrictions
|
1469 |
+
'format': 'best[height<=480][ext=mp4]/best[height<=720][ext=mp4]/best[ext=mp4]/best',
|
1470 |
'outtmpl': output_path,
|
1471 |
+
'quiet': False, # Changed to False for debugging
|
1472 |
+
'no_warnings': False,
|
1473 |
'merge_output_format': 'mp4',
|
1474 |
+
|
1475 |
+
# Anti-bot headers and user agent
|
1476 |
+
'http_headers': {
|
1477 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
1478 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
1479 |
+
'Accept-Language': 'en-us,en;q=0.5',
|
1480 |
+
'Accept-Encoding': 'gzip,deflate',
|
1481 |
+
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
1482 |
+
'Connection': 'keep-alive',
|
1483 |
+
'Upgrade-Insecure-Requests': '1',
|
1484 |
+
},
|
1485 |
+
|
1486 |
+
# Additional anti-detection measures
|
1487 |
+
'extractor_args': {
|
1488 |
+
'youtube': {
|
1489 |
+
'skip': ['hls', 'dash'], # Skip some formats that might trigger detection
|
1490 |
+
'player_skip': ['js'], # Skip JavaScript player
|
1491 |
+
}
|
1492 |
+
},
|
1493 |
+
|
1494 |
+
# Rate limiting
|
1495 |
+
'sleep_interval': 1,
|
1496 |
+
'max_sleep_interval': 5,
|
1497 |
+
'sleep_interval_subtitles': 1,
|
1498 |
+
|
1499 |
+
# Retry settings
|
1500 |
+
'retries': 3,
|
1501 |
+
'fragment_retries': 3,
|
1502 |
+
'skip_unavailable_fragments': True,
|
1503 |
+
|
1504 |
+
# Cookie handling (you can add browser cookies if needed)
|
1505 |
+
# 'cookiefile': '/path/to/cookies.txt', # Uncomment and set path if you have cookies
|
1506 |
+
|
1507 |
+
# Additional options
|
1508 |
+
'extract_flat': False,
|
1509 |
+
'writesubtitles': False,
|
1510 |
+
'writeautomaticsub': False,
|
1511 |
+
'ignoreerrors': True,
|
1512 |
+
|
1513 |
+
# Postprocessors
|
1514 |
'postprocessors': [{
|
1515 |
'key': 'FFmpegVideoConvertor',
|
1516 |
'preferedformat': 'mp4',
|
1517 |
}]
|
1518 |
}
|
1519 |
|
1520 |
+
print(f"Attempting to download: {url}")
|
1521 |
+
|
1522 |
+
# Try multiple download strategies
|
1523 |
+
strategies = [
|
1524 |
+
# Strategy 1: Standard download
|
1525 |
+
ydl_opts,
|
1526 |
+
|
1527 |
+
# Strategy 2: More conservative approach
|
1528 |
+
{
|
1529 |
+
**ydl_opts,
|
1530 |
+
'format': 'worst[ext=mp4]/worst', # Try worst quality first
|
1531 |
+
'sleep_interval': 2,
|
1532 |
+
'max_sleep_interval': 10,
|
1533 |
+
},
|
1534 |
+
|
1535 |
+
# Strategy 3: Different user agent
|
1536 |
+
{
|
1537 |
+
**ydl_opts,
|
1538 |
+
'http_headers': {
|
1539 |
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15'
|
1540 |
+
},
|
1541 |
+
'format': 'best[height<=360][ext=mp4]/best[ext=mp4]/best',
|
1542 |
+
}
|
1543 |
+
]
|
1544 |
|
1545 |
+
last_error = None
|
1546 |
+
for i, strategy in enumerate(strategies, 1):
|
1547 |
+
try:
|
1548 |
+
print(f"Trying download strategy {i}/3...")
|
1549 |
+
|
1550 |
+
with yt_dlp.YoutubeDL(strategy) as ydl:
|
1551 |
+
# Add some delay before download
|
1552 |
+
import time
|
1553 |
+
time.sleep(2)
|
1554 |
+
|
1555 |
+
ydl.download([url])
|
1556 |
+
|
1557 |
+
if os.path.exists(output_path):
|
1558 |
+
print(f"Video downloaded successfully with strategy {i}: {output_path}")
|
1559 |
+
return output_path
|
1560 |
+
else:
|
1561 |
+
print(f"Strategy {i} completed but file not found")
|
1562 |
+
|
1563 |
+
except Exception as e:
|
1564 |
+
last_error = e
|
1565 |
+
print(f"Strategy {i} failed: {str(e)}")
|
1566 |
+
if i < len(strategies):
|
1567 |
+
print(f"Trying next strategy...")
|
1568 |
+
# Add delay between strategies
|
1569 |
+
import time
|
1570 |
+
time.sleep(5)
|
1571 |
+
continue
|
1572 |
+
|
1573 |
+
# If all strategies failed, try one more approach with cookies from browser
|
1574 |
+
print("All standard strategies failed. Trying with browser cookies...")
|
1575 |
+
try:
|
1576 |
+
cookie_strategy = {
|
1577 |
+
**ydl_opts,
|
1578 |
+
'cookiesfrombrowser': ('chrome',), # Try to get cookies from Chrome
|
1579 |
+
'format': 'worst[ext=mp4]/worst',
|
1580 |
+
}
|
1581 |
+
|
1582 |
+
with yt_dlp.YoutubeDL(cookie_strategy) as ydl:
|
1583 |
+
ydl.download([url])
|
1584 |
+
|
1585 |
+
if os.path.exists(output_path):
|
1586 |
+
print(f"Video downloaded successfully with browser cookies: {output_path}")
|
1587 |
+
return output_path
|
1588 |
+
|
1589 |
+
except Exception as e:
|
1590 |
+
print(f"Browser cookie strategy also failed: {str(e)}")
|
1591 |
+
|
1592 |
+
print(f"All download strategies failed. Last error: {last_error}")
|
1593 |
+
return None
|
1594 |
|
1595 |
except Exception as e:
|
1596 |
print(f"Error downloading YouTube video: {str(e)}")
|
|
|
1755 |
def _answer_question_on_frame(self, frame_path: str, question: str) -> Tuple[str, float]:
|
1756 |
"""Answer question on single frame with confidence scoring"""
|
1757 |
try:
|
|
|
1758 |
image = Image.open(frame_path).convert('RGB')
|
1759 |
inputs = self.processor_vqa(image, question, return_tensors="pt").to(self.device)
|
1760 |
|
|
|
2026 |
"note": "No numeric results available for statistical summary"
|
2027 |
}
|
2028 |
|
|
|
2029 |
if not answers:
|
2030 |
return {
|
2031 |
"final_answer": "All frame processing failed.",
|
|
|
2040 |
# Find most common cluster
|
2041 |
largest_cluster = max(answer_clusters.items(), key=lambda x: len(x[1]))
|
2042 |
most_common_answer = largest_cluster[0]
|
|
|
2043 |
|
2044 |
# Calculate weighted confidence
|
2045 |
answer_counts = Counter(answers)
|
|
|
2065 |
"statistical_summary": stats
|
2066 |
}
|
2067 |
|
|
|
2068 |
def _run(self, youtube_url, question, **kwargs) -> str:
|
2069 |
"""Enhanced main execution method"""
|
|
|
2070 |
question = "How many unique bird species are on camera?"
|
2071 |
|
|
|
|
|
|
|
2072 |
input_data = {
|
2073 |
'youtube_url': youtube_url,
|
2074 |
'question': question
|
|
|
2086 |
cache_enabled = self._get_config('cache_enabled', True, input_data)
|
2087 |
video_path = self.download_youtube_video(youtube_url, video_hash, cache_enabled)
|
2088 |
if not video_path or not os.path.exists(video_path):
|
2089 |
+
return "Error: Failed to download the YouTube video. This may be due to YouTube's anti-bot protection. Try using a different video or implement cookie authentication."
|
2090 |
|
2091 |
# Step 2: Smart frame extraction
|
2092 |
print(f"Extracting frames with smart selection...")
|