Muktibhuyan's picture
Update app.py
26fa230 verified
raw
history blame
21.7 kB
import gradio as gr
import yt_dlp
import os
import tempfile
import shutil
from pathlib import Path
import re
import uuid
import json
from datetime import datetime
session_data = {}
class YouTubeDownloader:
def __init__(self):
self.download_dir = tempfile.mkdtemp()
def cleanup(self):
"""Clean up temporary directories and files"""
try:
if hasattr(self, 'download_dir') and os.path.exists(self.download_dir):
shutil.rmtree(self.download_dir)
print(f"βœ… Cleaned up temporary directory: {self.download_dir}")
except Exception as e:
print(f"⚠️ Warning: Could not clean up temporary directory: {e}")
def is_valid_youtube_url(self, url):
youtube_regex = re.compile(
r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/'
r'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})'
)
return youtube_regex.match(url) is not None
def analyze_content_type(self, video_info):
"""Analyze video content to determine type"""
title = video_info.get('title', '').lower()
description = video_info.get('description', '').lower()
tags = ' '.join(video_info.get('tags', [])).lower()
content_indicators = {
'educational': ['tutorial', 'how to', 'learn', 'guide', 'explained', 'lesson', 'course', 'tips'],
'promotional': ['ad', 'promo', 'launch', 'brand', 'sponsored', 'commercial', 'product'],
'entertainment': ['funny', 'comedy', 'challenge', 'reaction', 'prank', 'meme', 'fun'],
'review': ['review', 'unboxing', 'comparison', 'vs', 'test', 'rating'],
'vlog': ['vlog', 'daily', 'routine', 'day in', 'life', 'personal'],
'music': ['music', 'song', 'cover', 'remix', 'beats', 'audio'],
'news': ['news', 'breaking', 'update', 'report', 'latest', 'current']
}
metadata = f"{title} {description} {tags}"
for category, keywords in content_indicators.items():
if any(keyword in metadata for keyword in keywords):
return category.title()
return "General"
def analyze_emotion(self, video_info):
"""Analyze emotional tone of the video"""
title = video_info.get('title', '').lower()
description = video_info.get('description', '').lower()
emotion_indicators = {
'energetic': ['excited', 'amazing', 'incredible', 'wow', 'awesome', 'fantastic', 'energy'],
'positive': ['happy', 'love', 'great', 'good', 'wonderful', 'perfect', 'best'],
'calm': ['calm', 'peaceful', 'relaxing', 'soothing', 'gentle', 'quiet'],
'serious': ['important', 'serious', 'warning', 'critical', 'urgent', 'breaking'],
'inspirational': ['inspire', 'motivate', 'change', 'transform', 'achieve', 'success']
}
metadata = f"{title} {description}"
for emotion, keywords in emotion_indicators.items():
if any(keyword in metadata for keyword in keywords):
return emotion.title()
return "Neutral"
def analyze_music_style(self, video_info):
"""Analyze background music style"""
title = video_info.get('title', '').lower()
description = video_info.get('description', '').lower()
tags = ' '.join(video_info.get('tags', [])).lower()
metadata = f"{title} {description} {tags}"
music_styles = {
'upbeat': ['upbeat', 'energetic', 'fast', 'dance', 'pop', 'electronic', 'rock'],
'calm': ['calm', 'soft', 'soothing', 'ambient', 'peaceful', 'meditation', 'acoustic'],
'cinematic': ['cinematic', 'dramatic', 'epic', 'orchestral', 'soundtrack'],
'lo-fi': ['lo-fi', 'chill', 'study', 'relaxing beats'],
'classical': ['classical', 'piano', 'orchestra', 'symphony']
}
for style, keywords in music_styles.items():
if any(keyword in metadata for keyword in keywords):
return style.title()
# Check if it's likely a music video
if any(word in metadata for word in ['music', 'song', 'audio', 'beats']):
return "Music Content"
return "Background Music Present" if 'music' in metadata else "Minimal/No Music"
def detect_influencers(self, video_info):
"""Enhanced influencer detection"""
# Expanded list of known personalities
known_personalities = {
# Indian Film Industry
"Kartik Aaryan": ["kartik aaryan", "kartik", "aaryan"],
"Deepika Padukone": ["deepika padukone", "deepika"],
"Alia Bhatt": ["alia bhatt", "alia"],
"Ranveer Singh": ["ranveer singh", "ranveer"],
"Kiara Advani": ["kiara advani", "kiara"],
"Janhvi Kapoor": ["janhvi kapoor", "janhvi"],
"Ananya Panday": ["ananya panday", "ananya"],
"Salman Khan": ["salman khan", "salman"],
"Shahrukh Khan": ["shahrukh khan", "srk", "shah rukh"],
"Amitabh Bachchan": ["amitabh bachchan", "amitabh", "big b"],
"Katrina Kaif": ["katrina kaif", "katrina"],
# Sports Personalities
"Virat Kohli": ["virat kohli", "virat"],
"MS Dhoni": ["ms dhoni", "dhoni"],
"Rohit Sharma": ["rohit sharma", "rohit"],
# International Celebrities
"Taylor Swift": ["taylor swift", "taylor"],
"Kylie Jenner": ["kylie jenner", "kylie"],
"Elon Musk": ["elon musk", "elon"],
# YouTubers/Content Creators
"MrBeast": ["mrbeast", "mr beast"],
"PewDiePie": ["pewdiepie", "felix"],
"CarryMinati": ["carryminati", "carry", "ajey nagar"],
"Ashish Chanchlani": ["ashish chanchlani", "ashish"],
"Bhuvan Bam": ["bhuvan bam", "bb ki vines"],
"Prajakta Koli": ["prajakta koli", "mostlysane"],
# Tech Personalities
"Sundar Pichai": ["sundar pichai", "sundar"],
# Beauty/Fashion Influencers
"James Charles": ["james charles"],
"Nikkie Tutorials": ["nikkie tutorials", "nikkietutorials"]
}
# Combine all searchable text
searchable_text = " ".join([
video_info.get('title', ''),
video_info.get('description', ''),
video_info.get('uploader', ''),
video_info.get('channel', ''),
' '.join(video_info.get('tags', []))
]).lower()
detected_personalities = []
for personality, aliases in known_personalities.items():
if any(alias in searchable_text for alias in aliases):
detected_personalities.append(personality)
# Additional indicators
influencer_indicators = [
"influencer", "creator", "brand ambassador", "celebrity", "star",
"featured", "guest", "interview", "collaboration", "collab"
]
has_influencer_indicators = any(indicator in searchable_text for indicator in influencer_indicators)
if detected_personalities:
return f"TRUE - Detected: {', '.join(detected_personalities)}"
elif has_influencer_indicators:
return "TRUE - Likely influencer/celebrity present (check video for confirmation)"
else:
return "FALSE - No known personalities detected"
def generate_scene_breakdown(self, video_info):
"""Generate enhanced scene-by-scene breakdown"""
duration = video_info.get('duration', 0)
title = video_info.get('title', '').lower()
description = video_info.get('description', '').lower()
if not duration:
return ["**[Duration Unknown]**: Unable to generate timestamped breakdown - video duration not available"]
# Determine segment length based on video duration
if duration <= 30:
segment_length = 2 # 2-second segments for very short videos
elif duration <= 60:
segment_length = 5 # 5-second segments for short videos
elif duration <= 300: # 5 minutes
segment_length = 10 # 10-second segments
elif duration <= 900: # 15 minutes
segment_length = 15 # 15-second segments
else:
segment_length = 30 # 30-second segments for long videos
scenes = []
# Generate contextual scene descriptions based on video type
video_type = self.analyze_content_type(video_info).lower()
# Scene templates based on video type
scene_templates = {
'educational': [
"Introduction and topic overview",
"Main content explanation with examples",
"Detailed demonstration or walkthrough",
"Key points summary and tips",
"Conclusion and call-to-action"
],
'promotional': [
"Brand/product introduction",
"Key features showcase",
"Benefits and advantages highlight",
"Social proof or testimonials",
"Call-to-action and closing"
],
'entertainment': [
"Opening hook and introduction",
"Main entertainment content",
"Peak moment or climax",
"Reaction or commentary",
"Closing and engagement request"
],
'review': [
"Product/service introduction",
"First impressions and unboxing",
"Detailed feature analysis",
"Pros and cons discussion",
"Final verdict and recommendation"
],
'vlog': [
"Daily routine introduction",
"Activity or event coverage",
"Personal commentary and thoughts",
"Interaction with others",
"Day wrap-up and reflection"
]
}
templates = scene_templates.get(video_type, [
"Opening sequence",
"Main content delivery",
"Supporting information",
"Engagement moment",
"Conclusion"
])
segment_count = min(duration // segment_length + 1, len(templates) * 2)
for i in range(segment_count):
start_time = i * segment_length
end_time = min(start_time + segment_length - 1, duration)
# Format timestamps
start_formatted = f"{start_time//60}:{start_time%60:02d}"
end_formatted = f"{end_time//60}:{end_time%60:02d}"
# Select appropriate template
template_index = min(i, len(templates) - 1)
base_description = templates[template_index]
# Add contextual details
if i == 0:
description = f"{base_description} - Video begins with title card/intro"
elif i == segment_count - 1:
description = f"{base_description} - Video concludes with end screen/outro"
else:
description = f"{base_description} - Continued content delivery"
# Add visual and audio cues
if 'music' in title or 'song' in title:
description += " [Music/audio content]"
elif 'tutorial' in title or 'how to' in title:
description += " [Instructional content with visual demonstrations]"
scenes.append(f"**[{start_formatted}-{end_formatted}]**: {description}")
return scenes
def format_video_info(self, video_info):
"""Enhanced video information formatting"""
if not video_info:
return "❌ No video information available."
# Basic information processing
duration = video_info.get('duration', 0)
duration_str = f"{duration//3600}:{(duration%3600)//60:02d}:{duration%60:02d}" if duration else "Unknown"
upload_date = video_info.get('upload_date', '')
formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}" if len(upload_date) == 8 else upload_date or "Unknown"
def format_number(num):
if num is None or num == 0:
return "0"
if num >= 1_000_000_000:
return f"{num/1_000_000_000:.1f}B"
elif num >= 1_000_000:
return f"{num/1_000_000:.1f}M"
elif num >= 1_000:
return f"{num/1_000:.1f}K"
return str(num)
# Enhanced analysis
scene_descriptions = self.generate_scene_breakdown(video_info)
music_style = self.analyze_music_style(video_info)
influencer_detection = self.detect_influencers(video_info)
video_type = self.analyze_content_type(video_info)
emotion = self.analyze_emotion(video_info)
# Additional metadata
thumbnail_url = video_info.get('thumbnail', '')
language = video_info.get('language', 'Unknown')
availability = video_info.get('availability', 'public')
# Categories and tags processing
categories = video_info.get('categories', [])
tags = video_info.get('tags', [])
# Engagement metrics
view_count = video_info.get('view_count', 0)
like_count = video_info.get('like_count', 0)
comment_count = video_info.get('comment_count', 0)
engagement_rate = 0
if view_count > 0 and like_count is not None:
engagement_rate = (like_count / view_count) * 100
# Generate comprehensive report
report = f"""
🎬 COMPREHENSIVE VIDEO ANALYSIS REPORT
{'='*60}
πŸ“‹ BASIC INFORMATION
{'─'*30}
πŸ“Ή **Title:** {video_info.get('title', 'Unknown')}
πŸ“Ί **Channel:** {video_info.get('channel', 'Unknown')}
πŸ‘€ **Uploader:** {video_info.get('uploader', 'Unknown')}
πŸ“… **Upload Date:** {formatted_date}
⏱️ **Duration:** {duration_str}
🌐 **Language:** {language}
πŸ”“ **Availability:** {availability.title()}
πŸ“Š PERFORMANCE METRICS
{'─'*30}
πŸ‘€ **Views:** {format_number(view_count)}
πŸ‘ **Likes:** {format_number(like_count)}
πŸ’¬ **Comments:** {format_number(comment_count)}
πŸ‘₯ **Channel Subscribers:** {format_number(video_info.get('channel_followers', 0))}
πŸ“ˆ **Engagement Rate:** {engagement_rate:.2f}%
🏷️ CONTENT CLASSIFICATION
{'─'*30}
πŸ“‚ **Categories:** {', '.join(categories) if categories else 'None specified'}
πŸ”– **Primary Tags:** {', '.join(tags[:8]) if tags else 'None specified'}
{('πŸ”– **Additional Tags:** ' + ', '.join(tags[8:16]) + ('...' if len(tags) > 16 else '')) if len(tags) > 8 else ''}
πŸ“ VIDEO DESCRIPTION
{'─'*30}
{video_info.get('description', 'No description available')[:800]}
{'...\n[Description truncated - Full description available in original video]' if len(video_info.get('description', '')) > 800 else ''}
🎬 DETAILED SCENE-BY-SCENE BREAKDOWN
{'─'*40}
{chr(10).join(scene_descriptions)}
🎡 **Background Music Style:** {music_style}
πŸ‘€ **Influencer Present:** {influencer_detection}
πŸŽ₯ **Video Type:** {video_type}
🎭 **Overall Emotion:** {emotion}
πŸ“± TECHNICAL DETAILS
{'─'*30}
πŸ”— **Video URL:** {video_info.get('webpage_url', 'Unknown')}
πŸ–ΌοΈ **Thumbnail:** {thumbnail_url if thumbnail_url else 'Not available'}
πŸ“± **Video ID:** {video_info.get('id', 'Unknown')}
⚑ QUICK INSIGHTS
{'─'*30}
β€’ **Content Quality:** {'High' if view_count > 100000 else 'Medium' if view_count > 10000 else 'Growing'}
β€’ **Audience Engagement:** {'High' if engagement_rate > 5 else 'Medium' if engagement_rate > 1 else 'Low'}
β€’ **Viral Potential:** {'High' if view_count > 1000000 and engagement_rate > 3 else 'Medium' if view_count > 100000 else 'Standard'}
β€’ **Content Freshness:** {'Recent' if upload_date and upload_date >= '20240101' else 'Older Content'}
{'='*60}
πŸ“Š Analysis completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
"""
return report.strip()
def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
"""Extract video information with enhanced error handling"""
if not url or not url.strip():
return None, "❌ Please enter a YouTube URL"
if not self.is_valid_youtube_url(url):
return None, "❌ Invalid YouTube URL format"
try:
progress(0.1, desc="Initializing YouTube extractor...")
ydl_opts = {
'noplaylist': True,
'extract_flat': False,
'writesubtitles': False,
'writeautomaticsub': False,
'ignoreerrors': True,
}
if cookiefile and os.path.exists(cookiefile):
ydl_opts['cookiefile'] = cookiefile
progress(0.3, desc="Loading cookies for authentication...")
progress(0.5, desc="Extracting video metadata...")
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
progress(0.9, desc="Processing video information...")
progress(1.0, desc="βœ… Analysis complete!")
return info, "βœ… Video information extracted successfully"
except yt_dlp.DownloadError as e:
return None, f"❌ YouTube Download Error: {str(e)}"
except Exception as e:
return None, f"❌ Unexpected Error: {str(e)}"
# Initialize global downloader
downloader = YouTubeDownloader()
def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
"""Main analysis function with progress tracking"""
try:
progress(0.05, desc="Starting analysis...")
cookiefile = None
if cookies_file and os.path.exists(cookies_file):
cookiefile = cookies_file
progress(0.1, desc="Cookies file loaded successfully")
info, msg = downloader.get_video_info(url, progress=progress, cookiefile=cookiefile)
if info:
progress(0.95, desc="Generating comprehensive report...")
formatted_info = downloader.format_video_info(info)
progress(1.0, desc="βœ… Complete!")
return formatted_info
else:
return f"❌ Analysis Failed: {msg}"
except Exception as e:
return f"❌ System Error: {str(e)}"
def create_interface():
"""Create and configure the Gradio interface"""
with gr.Blocks(
theme=gr.themes.Soft(),
title="πŸŽ₯ YouTube Video Analyzer Pro",
css="""
.gradio-container {
max-width: 1200px !important;
}
.main-header {
text-align: center;
background: linear-gradient(90deg, #ff6b6b, #4ecdc4);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 2.5em;
font-weight: bold;
margin-bottom: 20px;
}
.description-text {
text-align: center;
font-size: 1.1em;
color: #666;
margin-bottom: 30px;
}
"""
) as interface:
gr.HTML("""
<div class="main-header">
πŸŽ₯ YouTube Video Analyzer Pro
</div>
<div class="description-text">
Get comprehensive analysis of any YouTube video with detailed scene breakdowns,
influencer detection, emotion analysis, and performance metrics.
Upload cookies.txt to access age-restricted or private videos.
</div>
""")
with gr.Row():
with gr.Column(scale=2):
url_input = gr.Textbox(
label="πŸ”— YouTube URL",
placeholder="Paste your YouTube video URL here...",
lines=1
)
with gr.Column(scale=1):
cookies_input = gr.File(
label="πŸͺ Upload cookies.txt (Optional)",
file_types=[".txt"],
type="filepath"
)
analyze_btn = gr.Button(
"πŸ” Analyze Video",
variant="primary",
size="lg"
)
output = gr.Textbox(
label="πŸ“Š Comprehensive Analysis Report",
lines=35,
max_lines=50,
show_copy_button=True
)
analyze_btn.click(
fn=analyze_with_cookies,
inputs=[url_input, cookies_input],
outputs=output,
show_progress=True
)
# Add examples
gr.Examples(
examples=[
["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
["https://youtu.be/jNQXAC9IVRw"],
],
inputs=url_input,
label="🎯 Try these examples:"
)
return interface
if __name__ == "__main__":
demo = create_interface()
import atexit
atexit.register(downloader.cleanup)
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
)