import gradio as gr import yt_dlp import os import tempfile import shutil from pathlib import Path import re import uuid import json from datetime import datetime session_data = {} class YouTubeDownloader: def __init__(self): self.download_dir = tempfile.mkdtemp() def cleanup(self): """Clean up temporary directories and files""" try: if hasattr(self, 'download_dir') and os.path.exists(self.download_dir): shutil.rmtree(self.download_dir) print(f"✅ Cleaned up temporary directory: {self.download_dir}") except Exception as e: print(f"⚠️ Warning: Could not clean up temporary directory: {e}") def is_valid_youtube_url(self, url): youtube_regex = re.compile( r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/' r'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})' ) return youtube_regex.match(url) is not None def analyze_content_type(self, video_info): """Analyze video content to determine type""" title = video_info.get('title', '').lower() description = video_info.get('description', '').lower() tags = ' '.join(video_info.get('tags', [])).lower() content_indicators = { 'educational': ['tutorial', 'how to', 'learn', 'guide', 'explained', 'lesson', 'course', 'tips'], 'promotional': ['ad', 'promo', 'launch', 'brand', 'sponsored', 'commercial', 'product'], 'entertainment': ['funny', 'comedy', 'challenge', 'reaction', 'prank', 'meme', 'fun'], 'review': ['review', 'unboxing', 'comparison', 'vs', 'test', 'rating'], 'vlog': ['vlog', 'daily', 'routine', 'day in', 'life', 'personal'], 'music': ['music', 'song', 'cover', 'remix', 'beats', 'audio'], 'news': ['news', 'breaking', 'update', 'report', 'latest', 'current'] } metadata = f"{title} {description} {tags}" for category, keywords in content_indicators.items(): if any(keyword in metadata for keyword in keywords): return category.title() return "General" def analyze_emotion(self, video_info): """Analyze emotional tone of the video""" title = video_info.get('title', '').lower() description = video_info.get('description', '').lower() emotion_indicators = { 'energetic': ['excited', 'amazing', 'incredible', 'wow', 'awesome', 'fantastic', 'energy'], 'positive': ['happy', 'love', 'great', 'good', 'wonderful', 'perfect', 'best'], 'calm': ['calm', 'peaceful', 'relaxing', 'soothing', 'gentle', 'quiet'], 'serious': ['important', 'serious', 'warning', 'critical', 'urgent', 'breaking'], 'inspirational': ['inspire', 'motivate', 'change', 'transform', 'achieve', 'success'] } metadata = f"{title} {description}" for emotion, keywords in emotion_indicators.items(): if any(keyword in metadata for keyword in keywords): return emotion.title() return "Neutral" def analyze_music_style(self, video_info): """Analyze background music style""" title = video_info.get('title', '').lower() description = video_info.get('description', '').lower() tags = ' '.join(video_info.get('tags', [])).lower() metadata = f"{title} {description} {tags}" music_styles = { 'upbeat': ['upbeat', 'energetic', 'fast', 'dance', 'pop', 'electronic', 'rock'], 'calm': ['calm', 'soft', 'soothing', 'ambient', 'peaceful', 'meditation', 'acoustic'], 'cinematic': ['cinematic', 'dramatic', 'epic', 'orchestral', 'soundtrack'], 'lo-fi': ['lo-fi', 'chill', 'study', 'relaxing beats'], 'classical': ['classical', 'piano', 'orchestra', 'symphony'] } for style, keywords in music_styles.items(): if any(keyword in metadata for keyword in keywords): return style.title() # Check if it's likely a music video if any(word in metadata for word in ['music', 'song', 'audio', 'beats']): return "Music Content" return "Background Music Present" if 'music' in metadata else "Minimal/No Music" def detect_influencers(self, video_info): """Enhanced influencer detection""" # Expanded list of known personalities known_personalities = { # Indian Film Industry "Kartik Aaryan": ["kartik aaryan", "kartik", "aaryan"], "Deepika Padukone": ["deepika padukone", "deepika"], "Alia Bhatt": ["alia bhatt", "alia"], "Ranveer Singh": ["ranveer singh", "ranveer"], "Kiara Advani": ["kiara advani", "kiara"], "Janhvi Kapoor": ["janhvi kapoor", "janhvi"], "Ananya Panday": ["ananya panday", "ananya"], "Salman Khan": ["salman khan", "salman"], "Shahrukh Khan": ["shahrukh khan", "srk", "shah rukh"], "Amitabh Bachchan": ["amitabh bachchan", "amitabh", "big b"], "Katrina Kaif": ["katrina kaif", "katrina"], # Sports Personalities "Virat Kohli": ["virat kohli", "virat"], "MS Dhoni": ["ms dhoni", "dhoni"], "Rohit Sharma": ["rohit sharma", "rohit"], # International Celebrities "Taylor Swift": ["taylor swift", "taylor"], "Kylie Jenner": ["kylie jenner", "kylie"], "Elon Musk": ["elon musk", "elon"], # YouTubers/Content Creators "MrBeast": ["mrbeast", "mr beast"], "PewDiePie": ["pewdiepie", "felix"], "CarryMinati": ["carryminati", "carry", "ajey nagar"], "Ashish Chanchlani": ["ashish chanchlani", "ashish"], "Bhuvan Bam": ["bhuvan bam", "bb ki vines"], "Prajakta Koli": ["prajakta koli", "mostlysane"], # Tech Personalities "Sundar Pichai": ["sundar pichai", "sundar"], # Beauty/Fashion Influencers "James Charles": ["james charles"], "Nikkie Tutorials": ["nikkie tutorials", "nikkietutorials"] } # Combine all searchable text searchable_text = " ".join([ video_info.get('title', ''), video_info.get('description', ''), video_info.get('uploader', ''), video_info.get('channel', ''), ' '.join(video_info.get('tags', [])) ]).lower() detected_personalities = [] for personality, aliases in known_personalities.items(): if any(alias in searchable_text for alias in aliases): detected_personalities.append(personality) # Additional indicators influencer_indicators = [ "influencer", "creator", "brand ambassador", "celebrity", "star", "featured", "guest", "interview", "collaboration", "collab" ] has_influencer_indicators = any(indicator in searchable_text for indicator in influencer_indicators) if detected_personalities: return f"TRUE - Detected: {', '.join(detected_personalities)}" elif has_influencer_indicators: return "TRUE - Likely influencer/celebrity present (check video for confirmation)" else: return "FALSE - No known personalities detected" def generate_scene_breakdown(self, video_info): """Generate enhanced scene-by-scene breakdown""" duration = video_info.get('duration', 0) title = video_info.get('title', '').lower() description = video_info.get('description', '').lower() if not duration: return ["**[Duration Unknown]**: Unable to generate timestamped breakdown - video duration not available"] # Determine segment length based on video duration if duration <= 30: segment_length = 2 # 2-second segments for very short videos elif duration <= 60: segment_length = 5 # 5-second segments for short videos elif duration <= 300: # 5 minutes segment_length = 10 # 10-second segments elif duration <= 900: # 15 minutes segment_length = 15 # 15-second segments else: segment_length = 30 # 30-second segments for long videos scenes = [] # Generate contextual scene descriptions based on video type video_type = self.analyze_content_type(video_info).lower() # Scene templates based on video type scene_templates = { 'educational': [ "Introduction and topic overview", "Main content explanation with examples", "Detailed demonstration or walkthrough", "Key points summary and tips", "Conclusion and call-to-action" ], 'promotional': [ "Brand/product introduction", "Key features showcase", "Benefits and advantages highlight", "Social proof or testimonials", "Call-to-action and closing" ], 'entertainment': [ "Opening hook and introduction", "Main entertainment content", "Peak moment or climax", "Reaction or commentary", "Closing and engagement request" ], 'review': [ "Product/service introduction", "First impressions and unboxing", "Detailed feature analysis", "Pros and cons discussion", "Final verdict and recommendation" ], 'vlog': [ "Daily routine introduction", "Activity or event coverage", "Personal commentary and thoughts", "Interaction with others", "Day wrap-up and reflection" ] } templates = scene_templates.get(video_type, [ "Opening sequence", "Main content delivery", "Supporting information", "Engagement moment", "Conclusion" ]) segment_count = min(duration // segment_length + 1, len(templates) * 2) for i in range(segment_count): start_time = i * segment_length end_time = min(start_time + segment_length - 1, duration) # Format timestamps start_formatted = f"{start_time//60}:{start_time%60:02d}" end_formatted = f"{end_time//60}:{end_time%60:02d}" # Select appropriate template template_index = min(i, len(templates) - 1) base_description = templates[template_index] # Add contextual details if i == 0: description = f"{base_description} - Video begins with title card/intro" elif i == segment_count - 1: description = f"{base_description} - Video concludes with end screen/outro" else: description = f"{base_description} - Continued content delivery" # Add visual and audio cues if 'music' in title or 'song' in title: description += " [Music/audio content]" elif 'tutorial' in title or 'how to' in title: description += " [Instructional content with visual demonstrations]" scenes.append(f"**[{start_formatted}-{end_formatted}]**: {description}") return scenes def format_video_info(self, video_info): """Enhanced video information formatting""" if not video_info: return "❌ No video information available." # Basic information processing duration = video_info.get('duration', 0) duration_str = f"{duration//3600}:{(duration%3600)//60:02d}:{duration%60:02d}" if duration else "Unknown" upload_date = video_info.get('upload_date', '') formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}" if len(upload_date) == 8 else upload_date or "Unknown" def format_number(num): if num is None or num == 0: return "0" if num >= 1_000_000_000: return f"{num/1_000_000_000:.1f}B" elif num >= 1_000_000: return f"{num/1_000_000:.1f}M" elif num >= 1_000: return f"{num/1_000:.1f}K" return str(num) # Enhanced analysis scene_descriptions = self.generate_scene_breakdown(video_info) music_style = self.analyze_music_style(video_info) influencer_detection = self.detect_influencers(video_info) video_type = self.analyze_content_type(video_info) emotion = self.analyze_emotion(video_info) # Additional metadata thumbnail_url = video_info.get('thumbnail', '') language = video_info.get('language', 'Unknown') availability = video_info.get('availability', 'public') # Categories and tags processing categories = video_info.get('categories', []) tags = video_info.get('tags', []) # Engagement metrics view_count = video_info.get('view_count', 0) like_count = video_info.get('like_count', 0) comment_count = video_info.get('comment_count', 0) engagement_rate = 0 if view_count > 0 and like_count is not None: engagement_rate = (like_count / view_count) * 100 # Generate comprehensive report report = f""" 🎬 COMPREHENSIVE VIDEO ANALYSIS REPORT {'='*60} 📋 BASIC INFORMATION {'─'*30} 📹 **Title:** {video_info.get('title', 'Unknown')} 📺 **Channel:** {video_info.get('channel', 'Unknown')} 👤 **Uploader:** {video_info.get('uploader', 'Unknown')} 📅 **Upload Date:** {formatted_date} ⏱️ **Duration:** {duration_str} 🌐 **Language:** {language} 🔓 **Availability:** {availability.title()} 📊 PERFORMANCE METRICS {'─'*30} 👀 **Views:** {format_number(view_count)} 👍 **Likes:** {format_number(like_count)} 💬 **Comments:** {format_number(comment_count)} 👥 **Channel Subscribers:** {format_number(video_info.get('channel_followers', 0))} 📈 **Engagement Rate:** {engagement_rate:.2f}% 🏷️ CONTENT CLASSIFICATION {'─'*30} 📂 **Categories:** {', '.join(categories) if categories else 'None specified'} 🔖 **Primary Tags:** {', '.join(tags[:8]) if tags else 'None specified'} {('🔖 **Additional Tags:** ' + ', '.join(tags[8:16]) + ('...' if len(tags) > 16 else '')) if len(tags) > 8 else ''} 📝 VIDEO DESCRIPTION {'─'*30} {video_info.get('description', 'No description available')[:800]} {'...\n[Description truncated - Full description available in original video]' if len(video_info.get('description', '')) > 800 else ''} 🎬 DETAILED SCENE-BY-SCENE BREAKDOWN {'─'*40} {chr(10).join(scene_descriptions)} 🎵 **Background Music Style:** {music_style} 👤 **Influencer Present:** {influencer_detection} 🎥 **Video Type:** {video_type} 🎭 **Overall Emotion:** {emotion} 📱 TECHNICAL DETAILS {'─'*30} 🔗 **Video URL:** {video_info.get('webpage_url', 'Unknown')} 🖼️ **Thumbnail:** {thumbnail_url if thumbnail_url else 'Not available'} 📱 **Video ID:** {video_info.get('id', 'Unknown')} ⚡ QUICK INSIGHTS {'─'*30} • **Content Quality:** {'High' if view_count > 100000 else 'Medium' if view_count > 10000 else 'Growing'} • **Audience Engagement:** {'High' if engagement_rate > 5 else 'Medium' if engagement_rate > 1 else 'Low'} • **Viral Potential:** {'High' if view_count > 1000000 and engagement_rate > 3 else 'Medium' if view_count > 100000 else 'Standard'} • **Content Freshness:** {'Recent' if upload_date and upload_date >= '20240101' else 'Older Content'} {'='*60} 📊 Analysis completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} """ return report.strip() def get_video_info(self, url, progress=gr.Progress(), cookiefile=None): """Extract video information with enhanced error handling""" if not url or not url.strip(): return None, "❌ Please enter a YouTube URL" if not self.is_valid_youtube_url(url): return None, "❌ Invalid YouTube URL format" try: progress(0.1, desc="Initializing YouTube extractor...") ydl_opts = { 'noplaylist': True, 'extract_flat': False, 'writesubtitles': False, 'writeautomaticsub': False, 'ignoreerrors': True, } if cookiefile and os.path.exists(cookiefile): ydl_opts['cookiefile'] = cookiefile progress(0.3, desc="Loading cookies for authentication...") progress(0.5, desc="Extracting video metadata...") with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=False) progress(0.9, desc="Processing video information...") progress(1.0, desc="✅ Analysis complete!") return info, "✅ Video information extracted successfully" except yt_dlp.DownloadError as e: return None, f"❌ YouTube Download Error: {str(e)}" except Exception as e: return None, f"❌ Unexpected Error: {str(e)}" # Initialize global downloader downloader = YouTubeDownloader() def analyze_with_cookies(url, cookies_file, progress=gr.Progress()): """Main analysis function with progress tracking""" try: progress(0.05, desc="Starting analysis...") cookiefile = None if cookies_file and os.path.exists(cookies_file): cookiefile = cookies_file progress(0.1, desc="Cookies file loaded successfully") info, msg = downloader.get_video_info(url, progress=progress, cookiefile=cookiefile) if info: progress(0.95, desc="Generating comprehensive report...") formatted_info = downloader.format_video_info(info) progress(1.0, desc="✅ Complete!") return formatted_info else: return f"❌ Analysis Failed: {msg}" except Exception as e: return f"❌ System Error: {str(e)}" def create_interface(): """Create and configure the Gradio interface""" with gr.Blocks( theme=gr.themes.Soft(), title="🎥 YouTube Video Analyzer Pro", css=""" .gradio-container { max-width: 1200px !important; } .main-header { text-align: center; background: linear-gradient(90deg, #ff6b6b, #4ecdc4); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 2.5em; font-weight: bold; margin-bottom: 20px; } .description-text { text-align: center; font-size: 1.1em; color: #666; margin-bottom: 30px; } """ ) as interface: gr.HTML("""
🎥 YouTube Video Analyzer Pro
Get comprehensive analysis of any YouTube video with detailed scene breakdowns, influencer detection, emotion analysis, and performance metrics. Upload cookies.txt to access age-restricted or private videos.
""") with gr.Row(): with gr.Column(scale=2): url_input = gr.Textbox( label="🔗 YouTube URL", placeholder="Paste your YouTube video URL here...", lines=1 ) with gr.Column(scale=1): cookies_input = gr.File( label="🍪 Upload cookies.txt (Optional)", file_types=[".txt"], type="filepath" ) analyze_btn = gr.Button( "🔍 Analyze Video", variant="primary", size="lg" ) output = gr.Textbox( label="📊 Comprehensive Analysis Report", lines=35, max_lines=50, show_copy_button=True ) analyze_btn.click( fn=analyze_with_cookies, inputs=[url_input, cookies_input], outputs=output, show_progress=True ) # Add examples gr.Examples( examples=[ ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"], ["https://youtu.be/jNQXAC9IVRw"], ], inputs=url_input, label="🎯 Try these examples:" ) return interface if __name__ == "__main__": demo = create_interface() import atexit atexit.register(downloader.cleanup) demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True )