Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import yt_dlp | |
import webvtt | |
import google.generativeai as genai | |
from datetime import datetime | |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # Set your Gemini API key in environment | |
MODEL_ID = "gemini-1.5-flash-latest" | |
model = genai.GenerativeModel(model_name=MODEL_ID) | |
class VideoAnalyzer: | |
def __init__(self): | |
self.temp_dir = "temp_subs" | |
os.makedirs(self.temp_dir, exist_ok=True) | |
self.downloads_dir = os.path.join(os.path.expanduser("~"), "Downloads", "YT_Reports") | |
os.makedirs(self.downloads_dir, exist_ok=True) | |
def is_valid_youtube_url(self, url): | |
return "youtube.com" in url or "youtu.be" in url | |
def download_info_and_subs(self, url, cookiefile=None): | |
ydl_opts = { | |
'writesubtitles': True, | |
'writeautomaticsub': True, | |
'subtitleslangs': ['en', 'en-HI', 'hi'], | |
'skip_download': True, | |
'outtmpl': os.path.join(self.temp_dir, 'video'), | |
} | |
if cookiefile: | |
ydl_opts['cookiefile'] = cookiefile | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
info = ydl.extract_info(url, download=False) | |
subs = info.get('automatic_captions') or info.get('subtitles') | |
return info if subs else None, info | |
def extract_captions(self, vtt_path): | |
captions = [] | |
for caption in webvtt.read(vtt_path): | |
text = caption.text.strip().replace("\n", " ") | |
captions.append(f"[{caption.start} - {caption.end}]: {text}") | |
return captions | |
def generate_scene_descriptions(self, captions): | |
prompt = """You are a visual scene narrator. Turn the following subtitles into timestamped scene descriptions. | |
Use natural language and visual imagination. Avoid brand names. | |
Example output: | |
* **[0:00-0:01]:** A young woman with dark hair, wearing a red shirt, sits at a desk in a modern office... | |
Now convert these subtitles: | |
""" | |
prompt += "\n".join(captions[:20]) # Limit to first 20 captions | |
response = model.generate_content(prompt) | |
return response.text | |
def detect_influencer_status(self, info): | |
prompt = f"""You are a media analyst. Based on this metadata, tell if this channel is a famous influencer, brand, or regular user. | |
Channel: {info.get('channel', '')} | |
Uploader: {info.get('uploader', '')} | |
Subscribers: {info.get('channel_followers', 0)} | |
Title: {info.get('title', '')} | |
Description: {info.get('description', '')} | |
Respond with 1 line like 'Famous Influencer', 'Verified Brand Channel', or 'Regular Content Creator'.""" | |
result = model.generate_content(prompt) | |
return result.text.strip() | |
def format_number(self, num): | |
if not num: return "0" | |
if num >= 1_000_000_000: return f"{num / 1_000_000_000:.1f}B" | |
elif num >= 1_000_000: return f"{num / 1_000_000:.1f}M" | |
elif num >= 1_000: return f"{num / 1_000:.1f}K" | |
return str(num) | |
def analyze(self, url, cookiefile=None): | |
if not self.is_valid_youtube_url(url): | |
return "β Invalid YouTube URL." | |
info, raw_info = self.download_info_and_subs(url, cookiefile) | |
if not info: | |
return "β Subtitles not found for this video." | |
vtt_path = None | |
for file in os.listdir(self.temp_dir): | |
if file.endswith(".vtt"): | |
vtt_path = os.path.join(self.temp_dir, file) | |
break | |
if not vtt_path: | |
return "β Subtitle file not found." | |
captions = self.extract_captions(vtt_path) | |
scene_block = self.generate_scene_descriptions(captions) | |
duration = raw_info.get('duration', 0) | |
duration_str = f"{duration//3600}:{(duration%3600)//60:02d}:{duration%60:02d}" if duration else "Unknown" | |
upload_date = raw_info.get('upload_date', '') | |
formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}" if len(upload_date) == 8 else "Unknown" | |
view_count = raw_info.get('view_count', 0) | |
like_count = raw_info.get('like_count', 0) | |
dislike_count = raw_info.get('dislike_count', 0) | |
comment_count = raw_info.get('comment_count', 0) | |
subscriber_count = raw_info.get('channel_followers', 0) | |
engagement_rate = (like_count / view_count) * 100 if view_count else 0 | |
like_ratio = (like_count / (like_count + dislike_count)) * 100 if (like_count + dislike_count) else 0 | |
comment_ratio = (comment_count / view_count) * 100 if view_count else 0 | |
influencer_status = self.detect_influencer_status(raw_info) | |
summary = f""" | |
π BASIC INFORMATION | |
{'β'*30} | |
πΉ **Title:** {raw_info.get('title', 'Unknown')} | |
πΊ **Channel:** {raw_info.get('channel', 'Unknown')} | |
π€ **Uploader:** {raw_info.get('uploader', 'Unknown')} | |
π **Upload Date:** {formatted_date} | |
β±οΈ **Duration:** {duration_str} | |
π **Video ID:** {raw_info.get('id', 'Unknown')} | |
π **Video URL:** {raw_info.get('webpage_url', 'Unknown')} | |
π PERFORMANCE METRICS | |
{'β'*30} | |
π **Views:** {self.format_number(view_count)} ({view_count:,} exact) | |
π **Likes:** {self.format_number(like_count)} ({like_count:,} exact) | |
π **Dislikes:** {self.format_number(dislike_count)} ({dislike_count:,} exact) | |
π¬ **Comments:** {self.format_number(comment_count)} ({comment_count:,} exact) | |
π₯ **Subscribers:** {self.format_number(subscriber_count)} ({subscriber_count:,} exact) | |
π **Engagement Rate:** {engagement_rate:.2f}% (likes/views) | |
β€οΈ **Like Ratio:** {like_ratio:.1f}% (likes vs total reactions) | |
π **Comment Ratio:** {comment_ratio:.3f}% (comments/views) | |
π INFLUENCER STATUS | |
{'β'*30} | |
{influencer_status} | |
π¬ SCENE-BY-SCENE BREAKDOWN | |
{'β'*30} | |
{scene_block} | |
""".strip() | |
return summary | |
def download_video(self, url, quality="best", audio_only=False, cookiefile=None): | |
ydl_opts = { | |
'outtmpl': os.path.join(self.downloads_dir, '%(title)s.%(ext)s'), | |
'format': 'bestaudio/best' if audio_only else 'best', | |
'noplaylist': True | |
} | |
if cookiefile: | |
ydl_opts['cookiefile'] = cookiefile | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
info = ydl.extract_info(url, download=True) | |
filename = ydl.prepare_filename(info) | |
return filename | |
analyzer = VideoAnalyzer() | |
def analyze_video(url, cookies_file): | |
cookiefile = cookies_file.name if cookies_file else None | |
return analyzer.analyze(url, cookiefile) | |
def download_video(url, quality, audio_only, cookies_file): | |
cookiefile = cookies_file.name if cookies_file else None | |
return analyzer.download_video(url, quality, audio_only, cookiefile) | |
with gr.Blocks(title="YouTube Analyzer + Downloader") as iface: | |
gr.Markdown("# π¬ YouTube Video Analyzer + Downloader") | |
url = gr.Textbox(label="YouTube URL") | |
cookies = gr.File(label="Upload cookies.txt (Optional)", file_types=[".txt"], type="file") | |
with gr.Tab("π Analyze Video"): | |
analyze_btn = gr.Button("Analyze") | |
analysis_output = gr.Textbox(label="Analysis Report", lines=40, show_copy_button=True) | |
analyze_btn.click(fn=analyze_video, inputs=[url, cookies], outputs=analysis_output) | |
with gr.Tab("β¬οΈ Download Video"): | |
quality = gr.Dropdown(["best", "720p", "480p"], label="Quality", value="best") | |
audio_only = gr.Checkbox(label="Audio Only", value=False) | |
download_btn = gr.Button("Download") | |
download_output = gr.Textbox(label="Download Status") | |
def handle_download(url, quality, audio_only, cookies_file): | |
path = download_video(url, quality, audio_only, cookies_file) | |
return f"β Downloaded to: {path}" | |
download_btn.click(fn=handle_download, inputs=[url, quality, audio_only, cookies], outputs=download_output) | |
if __name__ == "__main__": | |
iface.launch(debug=True) |