Spaces:
Sleeping
Sleeping
File size: 7,914 Bytes
39fa25b e1c4426 39fa25b e1c4426 39fa25b e1c4426 39fa25b e1c4426 39fa25b e1c4426 39fa25b e1c4426 39fa25b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
import os
import gradio as gr
import yt_dlp
import webvtt
import google.generativeai as genai
from datetime import datetime
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # Set your Gemini API key in environment
MODEL_ID = "gemini-1.5-flash-latest"
model = genai.GenerativeModel(model_name=MODEL_ID)
class VideoAnalyzer:
def __init__(self):
self.temp_dir = "temp_subs"
os.makedirs(self.temp_dir, exist_ok=True)
self.downloads_dir = os.path.join(os.path.expanduser("~"), "Downloads", "YT_Reports")
os.makedirs(self.downloads_dir, exist_ok=True)
def is_valid_youtube_url(self, url):
return "youtube.com" in url or "youtu.be" in url
def download_info_and_subs(self, url, cookiefile=None):
ydl_opts = {
'writesubtitles': True,
'writeautomaticsub': True,
'subtitleslangs': ['en', 'en-HI', 'hi'],
'skip_download': True,
'outtmpl': os.path.join(self.temp_dir, 'video'),
}
if cookiefile:
ydl_opts['cookiefile'] = cookiefile
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
subs = info.get('automatic_captions') or info.get('subtitles')
return info if subs else None, info
def extract_captions(self, vtt_path):
captions = []
for caption in webvtt.read(vtt_path):
text = caption.text.strip().replace("\n", " ")
captions.append(f"[{caption.start} - {caption.end}]: {text}")
return captions
def generate_scene_descriptions(self, captions):
prompt = """You are a visual scene narrator. Turn the following subtitles into timestamped scene descriptions.
Use natural language and visual imagination. Avoid brand names.
Example output:
* **[0:00-0:01]:** A young woman with dark hair, wearing a red shirt, sits at a desk in a modern office...
Now convert these subtitles:
"""
prompt += "\n".join(captions[:20]) # Limit to first 20 captions
response = model.generate_content(prompt)
return response.text
def detect_influencer_status(self, info):
prompt = f"""You are a media analyst. Based on this metadata, tell if this channel is a famous influencer, brand, or regular user.
Channel: {info.get('channel', '')}
Uploader: {info.get('uploader', '')}
Subscribers: {info.get('channel_followers', 0)}
Title: {info.get('title', '')}
Description: {info.get('description', '')}
Respond with 1 line like 'Famous Influencer', 'Verified Brand Channel', or 'Regular Content Creator'."""
result = model.generate_content(prompt)
return result.text.strip()
def format_number(self, num):
if not num: return "0"
if num >= 1_000_000_000: return f"{num / 1_000_000_000:.1f}B"
elif num >= 1_000_000: return f"{num / 1_000_000:.1f}M"
elif num >= 1_000: return f"{num / 1_000:.1f}K"
return str(num)
def analyze(self, url, cookiefile=None):
if not self.is_valid_youtube_url(url):
return "β Invalid YouTube URL."
info, raw_info = self.download_info_and_subs(url, cookiefile)
if not info:
return "β Subtitles not found for this video."
vtt_path = None
for file in os.listdir(self.temp_dir):
if file.endswith(".vtt"):
vtt_path = os.path.join(self.temp_dir, file)
break
if not vtt_path:
return "β Subtitle file not found."
captions = self.extract_captions(vtt_path)
scene_block = self.generate_scene_descriptions(captions)
duration = raw_info.get('duration', 0)
duration_str = f"{duration//3600}:{(duration%3600)//60:02d}:{duration%60:02d}" if duration else "Unknown"
upload_date = raw_info.get('upload_date', '')
formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}" if len(upload_date) == 8 else "Unknown"
view_count = raw_info.get('view_count', 0)
like_count = raw_info.get('like_count', 0)
dislike_count = raw_info.get('dislike_count', 0)
comment_count = raw_info.get('comment_count', 0)
subscriber_count = raw_info.get('channel_followers', 0)
engagement_rate = (like_count / view_count) * 100 if view_count else 0
like_ratio = (like_count / (like_count + dislike_count)) * 100 if (like_count + dislike_count) else 0
comment_ratio = (comment_count / view_count) * 100 if view_count else 0
influencer_status = self.detect_influencer_status(raw_info)
summary = f"""
π BASIC INFORMATION
{'β'*30}
πΉ **Title:** {raw_info.get('title', 'Unknown')}
πΊ **Channel:** {raw_info.get('channel', 'Unknown')}
π€ **Uploader:** {raw_info.get('uploader', 'Unknown')}
π
**Upload Date:** {formatted_date}
β±οΈ **Duration:** {duration_str}
π **Video ID:** {raw_info.get('id', 'Unknown')}
π **Video URL:** {raw_info.get('webpage_url', 'Unknown')}
π PERFORMANCE METRICS
{'β'*30}
π **Views:** {self.format_number(view_count)} ({view_count:,} exact)
π **Likes:** {self.format_number(like_count)} ({like_count:,} exact)
π **Dislikes:** {self.format_number(dislike_count)} ({dislike_count:,} exact)
π¬ **Comments:** {self.format_number(comment_count)} ({comment_count:,} exact)
π₯ **Subscribers:** {self.format_number(subscriber_count)} ({subscriber_count:,} exact)
π **Engagement Rate:** {engagement_rate:.2f}% (likes/views)
β€οΈ **Like Ratio:** {like_ratio:.1f}% (likes vs total reactions)
π **Comment Ratio:** {comment_ratio:.3f}% (comments/views)
π INFLUENCER STATUS
{'β'*30}
{influencer_status}
π¬ SCENE-BY-SCENE BREAKDOWN
{'β'*30}
{scene_block}
""".strip()
return summary
def download_video(self, url, quality="best", audio_only=False, cookiefile=None):
ydl_opts = {
'outtmpl': os.path.join(self.downloads_dir, '%(title)s.%(ext)s'),
'format': 'bestaudio/best' if audio_only else 'best',
'noplaylist': True
}
if cookiefile:
ydl_opts['cookiefile'] = cookiefile
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=True)
filename = ydl.prepare_filename(info)
return filename
analyzer = VideoAnalyzer()
def analyze_video(url, cookies_file):
cookiefile = cookies_file.name if cookies_file else None
return analyzer.analyze(url, cookiefile)
def download_video(url, quality, audio_only, cookies_file):
cookiefile = cookies_file.name if cookies_file else None
return analyzer.download_video(url, quality, audio_only, cookiefile)
with gr.Blocks(title="YouTube Analyzer + Downloader") as iface:
gr.Markdown("# π¬ YouTube Video Analyzer + Downloader")
url = gr.Textbox(label="YouTube URL")
cookies = gr.File(label="Upload cookies.txt (Optional)", file_types=[".txt"], type="file")
with gr.Tab("π Analyze Video"):
analyze_btn = gr.Button("Analyze")
analysis_output = gr.Textbox(label="Analysis Report", lines=40, show_copy_button=True)
analyze_btn.click(fn=analyze_video, inputs=[url, cookies], outputs=analysis_output)
with gr.Tab("β¬οΈ Download Video"):
quality = gr.Dropdown(["best", "720p", "480p"], label="Quality", value="best")
audio_only = gr.Checkbox(label="Audio Only", value=False)
download_btn = gr.Button("Download")
download_output = gr.Textbox(label="Download Status")
def handle_download(url, quality, audio_only, cookies_file):
path = download_video(url, quality, audio_only, cookies_file)
return f"β
Downloaded to: {path}"
download_btn.click(fn=handle_download, inputs=[url, quality, audio_only, cookies], outputs=download_output)
if __name__ == "__main__":
iface.launch(debug=True) |