Spaces:

developer28
/

Youtubedownloader

Sleeping

App Files Files Community

Youtubedownloader / app.py

Muktibhuyan

Update app.py

26fa230 verified about 1 month ago

raw

history blame

21.7 kB

	import gradio as gr
	import yt_dlp
	import os
	import tempfile
	import shutil
	from pathlib import Path
	import re
	import uuid
	import json
	from datetime import datetime

	session_data = {}

	class YouTubeDownloader:
	def __init__(self):
	self.download_dir = tempfile.mkdtemp()

	def cleanup(self):
	"""Clean up temporary directories and files"""
	try:
	if hasattr(self, 'download_dir') and os.path.exists(self.download_dir):
	shutil.rmtree(self.download_dir)
	print(f"✅ Cleaned up temporary directory: {self.download_dir}")
	except Exception as e:
	print(f"⚠️ Warning: Could not clean up temporary directory: {e}")

	def is_valid_youtube_url(self, url):
	youtube_regex = re.compile(
	r'(https?://)?(www\.)?(youtube\|youtu\|youtube-nocookie)\.(com\|be)/'
	r'(watch\?v=\|embed/\|v/\|.+\?v=)?([^&=%\?]{11})'
	)
	return youtube_regex.match(url) is not None

	def analyze_content_type(self, video_info):
	"""Analyze video content to determine type"""
	title = video_info.get('title', '').lower()
	description = video_info.get('description', '').lower()
	tags = ' '.join(video_info.get('tags', [])).lower()

	content_indicators = {
	'educational': ['tutorial', 'how to', 'learn', 'guide', 'explained', 'lesson', 'course', 'tips'],
	'promotional': ['ad', 'promo', 'launch', 'brand', 'sponsored', 'commercial', 'product'],
	'entertainment': ['funny', 'comedy', 'challenge', 'reaction', 'prank', 'meme', 'fun'],
	'review': ['review', 'unboxing', 'comparison', 'vs', 'test', 'rating'],
	'vlog': ['vlog', 'daily', 'routine', 'day in', 'life', 'personal'],
	'music': ['music', 'song', 'cover', 'remix', 'beats', 'audio'],
	'news': ['news', 'breaking', 'update', 'report', 'latest', 'current']
	}

	metadata = f"{title} {description} {tags}"

	for category, keywords in content_indicators.items():
	if any(keyword in metadata for keyword in keywords):
	return category.title()

	return "General"

	def analyze_emotion(self, video_info):
	"""Analyze emotional tone of the video"""
	title = video_info.get('title', '').lower()
	description = video_info.get('description', '').lower()

	emotion_indicators = {
	'energetic': ['excited', 'amazing', 'incredible', 'wow', 'awesome', 'fantastic', 'energy'],
	'positive': ['happy', 'love', 'great', 'good', 'wonderful', 'perfect', 'best'],
	'calm': ['calm', 'peaceful', 'relaxing', 'soothing', 'gentle', 'quiet'],
	'serious': ['important', 'serious', 'warning', 'critical', 'urgent', 'breaking'],
	'inspirational': ['inspire', 'motivate', 'change', 'transform', 'achieve', 'success']
	}

	metadata = f"{title} {description}"

	for emotion, keywords in emotion_indicators.items():
	if any(keyword in metadata for keyword in keywords):
	return emotion.title()

	return "Neutral"

	def analyze_music_style(self, video_info):
	"""Analyze background music style"""
	title = video_info.get('title', '').lower()
	description = video_info.get('description', '').lower()
	tags = ' '.join(video_info.get('tags', [])).lower()

	metadata = f"{title} {description} {tags}"

	music_styles = {
	'upbeat': ['upbeat', 'energetic', 'fast', 'dance', 'pop', 'electronic', 'rock'],
	'calm': ['calm', 'soft', 'soothing', 'ambient', 'peaceful', 'meditation', 'acoustic'],
	'cinematic': ['cinematic', 'dramatic', 'epic', 'orchestral', 'soundtrack'],
	'lo-fi': ['lo-fi', 'chill', 'study', 'relaxing beats'],
	'classical': ['classical', 'piano', 'orchestra', 'symphony']
	}

	for style, keywords in music_styles.items():
	if any(keyword in metadata for keyword in keywords):
	return style.title()

	# Check if it's likely a music video
	if any(word in metadata for word in ['music', 'song', 'audio', 'beats']):
	return "Music Content"

	return "Background Music Present" if 'music' in metadata else "Minimal/No Music"

	def detect_influencers(self, video_info):
	"""Enhanced influencer detection"""
	# Expanded list of known personalities
	known_personalities = {
	# Indian Film Industry
	"Kartik Aaryan": ["kartik aaryan", "kartik", "aaryan"],
	"Deepika Padukone": ["deepika padukone", "deepika"],
	"Alia Bhatt": ["alia bhatt", "alia"],
	"Ranveer Singh": ["ranveer singh", "ranveer"],
	"Kiara Advani": ["kiara advani", "kiara"],
	"Janhvi Kapoor": ["janhvi kapoor", "janhvi"],
	"Ananya Panday": ["ananya panday", "ananya"],
	"Salman Khan": ["salman khan", "salman"],
	"Shahrukh Khan": ["shahrukh khan", "srk", "shah rukh"],
	"Amitabh Bachchan": ["amitabh bachchan", "amitabh", "big b"],
	"Katrina Kaif": ["katrina kaif", "katrina"],

	# Sports Personalities
	"Virat Kohli": ["virat kohli", "virat"],
	"MS Dhoni": ["ms dhoni", "dhoni"],
	"Rohit Sharma": ["rohit sharma", "rohit"],

	# International Celebrities
	"Taylor Swift": ["taylor swift", "taylor"],
	"Kylie Jenner": ["kylie jenner", "kylie"],
	"Elon Musk": ["elon musk", "elon"],

	# YouTubers/Content Creators
	"MrBeast": ["mrbeast", "mr beast"],
	"PewDiePie": ["pewdiepie", "felix"],
	"CarryMinati": ["carryminati", "carry", "ajey nagar"],
	"Ashish Chanchlani": ["ashish chanchlani", "ashish"],
	"Bhuvan Bam": ["bhuvan bam", "bb ki vines"],
	"Prajakta Koli": ["prajakta koli", "mostlysane"],

	# Tech Personalities
	"Sundar Pichai": ["sundar pichai", "sundar"],

	# Beauty/Fashion Influencers
	"James Charles": ["james charles"],
	"Nikkie Tutorials": ["nikkie tutorials", "nikkietutorials"]
	}

	# Combine all searchable text
	searchable_text = " ".join([
	video_info.get('title', ''),
	video_info.get('description', ''),
	video_info.get('uploader', ''),
	video_info.get('channel', ''),
	' '.join(video_info.get('tags', []))
	]).lower()

	detected_personalities = []

	for personality, aliases in known_personalities.items():
	if any(alias in searchable_text for alias in aliases):
	detected_personalities.append(personality)

	# Additional indicators
	influencer_indicators = [
	"influencer", "creator", "brand ambassador", "celebrity", "star",
	"featured", "guest", "interview", "collaboration", "collab"
	]

	has_influencer_indicators = any(indicator in searchable_text for indicator in influencer_indicators)

	if detected_personalities:
	return f"TRUE - Detected: {', '.join(detected_personalities)}"
	elif has_influencer_indicators:
	return "TRUE - Likely influencer/celebrity present (check video for confirmation)"
	else:
	return "FALSE - No known personalities detected"

	def generate_scene_breakdown(self, video_info):
	"""Generate enhanced scene-by-scene breakdown"""
	duration = video_info.get('duration', 0)
	title = video_info.get('title', '').lower()
	description = video_info.get('description', '').lower()

	if not duration:
	return ["[Duration Unknown]: Unable to generate timestamped breakdown - video duration not available"]

	# Determine segment length based on video duration
	if duration <= 30:
	segment_length = 2 # 2-second segments for very short videos
	elif duration <= 60:
	segment_length = 5 # 5-second segments for short videos
	elif duration <= 300: # 5 minutes
	segment_length = 10 # 10-second segments
	elif duration <= 900: # 15 minutes
	segment_length = 15 # 15-second segments
	else:
	segment_length = 30 # 30-second segments for long videos

	scenes = []

	# Generate contextual scene descriptions based on video type
	video_type = self.analyze_content_type(video_info).lower()

	# Scene templates based on video type
	scene_templates = {
	'educational': [
	"Introduction and topic overview",
	"Main content explanation with examples",
	"Detailed demonstration or walkthrough",
	"Key points summary and tips",
	"Conclusion and call-to-action"
	],
	'promotional': [
	"Brand/product introduction",
	"Key features showcase",
	"Benefits and advantages highlight",
	"Social proof or testimonials",
	"Call-to-action and closing"
	],
	'entertainment': [
	"Opening hook and introduction",
	"Main entertainment content",
	"Peak moment or climax",
	"Reaction or commentary",
	"Closing and engagement request"
	],
	'review': [
	"Product/service introduction",
	"First impressions and unboxing",
	"Detailed feature analysis",
	"Pros and cons discussion",
	"Final verdict and recommendation"
	],
	'vlog': [
	"Daily routine introduction",
	"Activity or event coverage",
	"Personal commentary and thoughts",
	"Interaction with others",
	"Day wrap-up and reflection"
	]
	}

	templates = scene_templates.get(video_type, [
	"Opening sequence",
	"Main content delivery",
	"Supporting information",
	"Engagement moment",
	"Conclusion"
	])

	segment_count = min(duration // segment_length + 1, len(templates) * 2)

	for i in range(segment_count):
	start_time = i * segment_length
	end_time = min(start_time + segment_length - 1, duration)

	# Format timestamps
	start_formatted = f"{start_time//60}:{start_time%60:02d}"
	end_formatted = f"{end_time//60}:{end_time%60:02d}"

	# Select appropriate template
	template_index = min(i, len(templates) - 1)
	base_description = templates[template_index]

	# Add contextual details
	if i == 0:
	description = f"{base_description} - Video begins with title card/intro"
	elif i == segment_count - 1:
	description = f"{base_description} - Video concludes with end screen/outro"
	else:
	description = f"{base_description} - Continued content delivery"

	# Add visual and audio cues
	if 'music' in title or 'song' in title:
	description += " [Music/audio content]"
	elif 'tutorial' in title or 'how to' in title:
	description += " [Instructional content with visual demonstrations]"

	scenes.append(f"[{start_formatted}-{end_formatted}]: {description}")

	return scenes

	def format_video_info(self, video_info):
	"""Enhanced video information formatting"""
	if not video_info:
	return "❌ No video information available."

	# Basic information processing
	duration = video_info.get('duration', 0)
	duration_str = f"{duration//3600}:{(duration%3600)//60:02d}:{duration%60:02d}" if duration else "Unknown"

	upload_date = video_info.get('upload_date', '')
	formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}" if len(upload_date) == 8 else upload_date or "Unknown"

	def format_number(num):
	if num is None or num == 0:
	return "0"
	if num >= 1_000_000_000:
	return f"{num/1_000_000_000:.1f}B"
	elif num >= 1_000_000:
	return f"{num/1_000_000:.1f}M"
	elif num >= 1_000:
	return f"{num/1_000:.1f}K"
	return str(num)

	# Enhanced analysis
	scene_descriptions = self.generate_scene_breakdown(video_info)
	music_style = self.analyze_music_style(video_info)
	influencer_detection = self.detect_influencers(video_info)
	video_type = self.analyze_content_type(video_info)
	emotion = self.analyze_emotion(video_info)

	# Additional metadata
	thumbnail_url = video_info.get('thumbnail', '')
	language = video_info.get('language', 'Unknown')
	availability = video_info.get('availability', 'public')

	# Categories and tags processing
	categories = video_info.get('categories', [])
	tags = video_info.get('tags', [])

	# Engagement metrics
	view_count = video_info.get('view_count', 0)
	like_count = video_info.get('like_count', 0)
	comment_count = video_info.get('comment_count', 0)

	engagement_rate = 0
	if view_count > 0 and like_count is not None:
	engagement_rate = (like_count / view_count) * 100

	# Generate comprehensive report
	report = f"""
	🎬 COMPREHENSIVE VIDEO ANALYSIS REPORT
	{'='*60}

	📋 BASIC INFORMATION
	{'─'*30}
	📹 Title: {video_info.get('title', 'Unknown')}
	📺 Channel: {video_info.get('channel', 'Unknown')}
	👤 Uploader: {video_info.get('uploader', 'Unknown')}
	📅 Upload Date: {formatted_date}
	⏱️ Duration: {duration_str}
	🌐 Language: {language}
	🔓 Availability: {availability.title()}

	📊 PERFORMANCE METRICS
	{'─'*30}
	👀 Views: {format_number(view_count)}
	👍 Likes: {format_number(like_count)}
	💬 Comments: {format_number(comment_count)}
	👥 Channel Subscribers: {format_number(video_info.get('channel_followers', 0))}
	📈 Engagement Rate: {engagement_rate:.2f}%

	🏷️ CONTENT CLASSIFICATION
	{'─'*30}
	📂 Categories: {', '.join(categories) if categories else 'None specified'}
	🔖 Primary Tags: {', '.join(tags[:8]) if tags else 'None specified'}
	{('🔖 Additional Tags: ' + ', '.join(tags[8:16]) + ('...' if len(tags) > 16 else '')) if len(tags) > 8 else ''}

	📝 VIDEO DESCRIPTION
	{'─'*30}
	{video_info.get('description', 'No description available')[:800]}
	{'...\n[Description truncated - Full description available in original video]' if len(video_info.get('description', '')) > 800 else ''}

	🎬 DETAILED SCENE-BY-SCENE BREAKDOWN
	{'─'*40}
	{chr(10).join(scene_descriptions)}

	🎵 Background Music Style: {music_style}

	👤 Influencer Present: {influencer_detection}

	🎥 Video Type: {video_type}

	🎭 Overall Emotion: {emotion}

	📱 TECHNICAL DETAILS
	{'─'*30}
	🔗 Video URL: {video_info.get('webpage_url', 'Unknown')}
	🖼️ Thumbnail: {thumbnail_url if thumbnail_url else 'Not available'}
	📱 Video ID: {video_info.get('id', 'Unknown')}

	⚡ QUICK INSIGHTS
	{'─'*30}
	• Content Quality: {'High' if view_count > 100000 else 'Medium' if view_count > 10000 else 'Growing'}
	• Audience Engagement: {'High' if engagement_rate > 5 else 'Medium' if engagement_rate > 1 else 'Low'}
	• Viral Potential: {'High' if view_count > 1000000 and engagement_rate > 3 else 'Medium' if view_count > 100000 else 'Standard'}
	• Content Freshness: {'Recent' if upload_date and upload_date >= '20240101' else 'Older Content'}

	{'='*60}
	📊 Analysis completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
	"""
	return report.strip()

	def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
	"""Extract video information with enhanced error handling"""
	if not url or not url.strip():
	return None, "❌ Please enter a YouTube URL"

	if not self.is_valid_youtube_url(url):
	return None, "❌ Invalid YouTube URL format"

	try:
	progress(0.1, desc="Initializing YouTube extractor...")

	ydl_opts = {
	'noplaylist': True,
	'extract_flat': False,
	'writesubtitles': False,
	'writeautomaticsub': False,
	'ignoreerrors': True,
	}

	if cookiefile and os.path.exists(cookiefile):
	ydl_opts['cookiefile'] = cookiefile
	progress(0.3, desc="Loading cookies for authentication...")

	progress(0.5, desc="Extracting video metadata...")

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	info = ydl.extract_info(url, download=False)

	progress(0.9, desc="Processing video information...")
	progress(1.0, desc="✅ Analysis complete!")

	return info, "✅ Video information extracted successfully"

	except yt_dlp.DownloadError as e:
	return None, f"❌ YouTube Download Error: {str(e)}"
	except Exception as e:
	return None, f"❌ Unexpected Error: {str(e)}"

	# Initialize global downloader
	downloader = YouTubeDownloader()

	def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
	"""Main analysis function with progress tracking"""
	try:
	progress(0.05, desc="Starting analysis...")

	cookiefile = None
	if cookies_file and os.path.exists(cookies_file):
	cookiefile = cookies_file
	progress(0.1, desc="Cookies file loaded successfully")

	info, msg = downloader.get_video_info(url, progress=progress, cookiefile=cookiefile)

	if info:
	progress(0.95, desc="Generating comprehensive report...")
	formatted_info = downloader.format_video_info(info)
	progress(1.0, desc="✅ Complete!")
	return formatted_info
	else:
	return f"❌ Analysis Failed: {msg}"

	except Exception as e:
	return f"❌ System Error: {str(e)}"

	def create_interface():
	"""Create and configure the Gradio interface"""
	with gr.Blocks(
	theme=gr.themes.Soft(),
	title="🎥 YouTube Video Analyzer Pro",
	css="""
	.gradio-container {
	max-width: 1200px !important;
	}
	.main-header {
	text-align: center;
	background: linear-gradient(90deg, #ff6b6b, #4ecdc4);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-size: 2.5em;
	font-weight: bold;
	margin-bottom: 20px;
	}
	.description-text {
	text-align: center;
	font-size: 1.1em;
	color: #666;
	margin-bottom: 30px;
	}
	"""
	) as interface:

	gr.HTML("""
	<div class="main-header">
	🎥 YouTube Video Analyzer Pro
	</div>
	<div class="description-text">
	Get comprehensive analysis of any YouTube video with detailed scene breakdowns,
	influencer detection, emotion analysis, and performance metrics.
	Upload cookies.txt to access age-restricted or private videos.
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=2):
	url_input = gr.Textbox(
	label="🔗 YouTube URL",
	placeholder="Paste your YouTube video URL here...",
	lines=1
	)

	with gr.Column(scale=1):
	cookies_input = gr.File(
	label="🍪 Upload cookies.txt (Optional)",
	file_types=[".txt"],
	type="filepath"
	)

	analyze_btn = gr.Button(
	"🔍 Analyze Video",
	variant="primary",
	size="lg"
	)

	output = gr.Textbox(
	label="📊 Comprehensive Analysis Report",
	lines=35,
	max_lines=50,
	show_copy_button=True
	)

	analyze_btn.click(
	fn=analyze_with_cookies,
	inputs=[url_input, cookies_input],
	outputs=output,
	show_progress=True
	)

	# Add examples
	gr.Examples(
	examples=[
	["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
	["https://youtu.be/jNQXAC9IVRw"],
	],
	inputs=url_input,
	label="🎯 Try these examples:"
	)

	return interface

	if __name__ == "__main__":
	demo = create_interface()
	import atexit
	atexit.register(downloader.cleanup)
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True
	)