Spaces:

developer28
/

Youtubedownloader

Sleeping

File size: 7,914 Bytes

import os
import gradio as gr
import yt_dlp
import webvtt
import google.generativeai as genai
from datetime import datetime

genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))  # Set your Gemini API key in environment
MODEL_ID = "gemini-1.5-flash-latest"
model = genai.GenerativeModel(model_name=MODEL_ID)

class VideoAnalyzer:
    def __init__(self):
        self.temp_dir = "temp_subs"
        os.makedirs(self.temp_dir, exist_ok=True)
        self.downloads_dir = os.path.join(os.path.expanduser("~"), "Downloads", "YT_Reports")
        os.makedirs(self.downloads_dir, exist_ok=True)

    def is_valid_youtube_url(self, url):
        return "youtube.com" in url or "youtu.be" in url

    def download_info_and_subs(self, url, cookiefile=None):
        ydl_opts = {
            'writesubtitles': True,
            'writeautomaticsub': True,
            'subtitleslangs': ['en', 'en-HI', 'hi'],
            'skip_download': True,
            'outtmpl': os.path.join(self.temp_dir, 'video'),
        }
        if cookiefile:
            ydl_opts['cookiefile'] = cookiefile

        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=False)
            subs = info.get('automatic_captions') or info.get('subtitles')
            return info if subs else None, info

    def extract_captions(self, vtt_path):
        captions = []
        for caption in webvtt.read(vtt_path):
            text = caption.text.strip().replace("\n", " ")
            captions.append(f"[{caption.start} - {caption.end}]: {text}")
        return captions

    def generate_scene_descriptions(self, captions):
        prompt = """You are a visual scene narrator. Turn the following subtitles into timestamped scene descriptions.
Use natural language and visual imagination. Avoid brand names.

Example output:
* **[0:00-0:01]:** A young woman with dark hair, wearing a red shirt, sits at a desk in a modern office...

Now convert these subtitles:

"""
        prompt += "\n".join(captions[:20])  # Limit to first 20 captions

        response = model.generate_content(prompt)
        return response.text

    def detect_influencer_status(self, info):
        prompt = f"""You are a media analyst. Based on this metadata, tell if this channel is a famous influencer, brand, or regular user.

Channel: {info.get('channel', '')}
Uploader: {info.get('uploader', '')}
Subscribers: {info.get('channel_followers', 0)}
Title: {info.get('title', '')}
Description: {info.get('description', '')}

Respond with 1 line like 'Famous Influencer', 'Verified Brand Channel', or 'Regular Content Creator'."""
        result = model.generate_content(prompt)
        return result.text.strip()

    def format_number(self, num):
        if not num: return "0"
        if num >= 1_000_000_000: return f"{num / 1_000_000_000:.1f}B"
        elif num >= 1_000_000: return f"{num / 1_000_000:.1f}M"
        elif num >= 1_000: return f"{num / 1_000:.1f}K"
        return str(num)

    def analyze(self, url, cookiefile=None):
        if not self.is_valid_youtube_url(url):
            return "❌ Invalid YouTube URL."

        info, raw_info = self.download_info_and_subs(url, cookiefile)
        if not info:
            return "❌ Subtitles not found for this video."

        vtt_path = None
        for file in os.listdir(self.temp_dir):
            if file.endswith(".vtt"):
                vtt_path = os.path.join(self.temp_dir, file)
                break

        if not vtt_path:
            return "❌ Subtitle file not found."

        captions = self.extract_captions(vtt_path)
        scene_block = self.generate_scene_descriptions(captions)

        duration = raw_info.get('duration', 0)
        duration_str = f"{duration//3600}:{(duration%3600)//60:02d}:{duration%60:02d}" if duration else "Unknown"
        upload_date = raw_info.get('upload_date', '')
        formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}" if len(upload_date) == 8 else "Unknown"

        view_count = raw_info.get('view_count', 0)
        like_count = raw_info.get('like_count', 0)
        dislike_count = raw_info.get('dislike_count', 0)
        comment_count = raw_info.get('comment_count', 0)
        subscriber_count = raw_info.get('channel_followers', 0)
        engagement_rate = (like_count / view_count) * 100 if view_count else 0
        like_ratio = (like_count / (like_count + dislike_count)) * 100 if (like_count + dislike_count) else 0
        comment_ratio = (comment_count / view_count) * 100 if view_count else 0

        influencer_status = self.detect_influencer_status(raw_info)

        summary = f"""
📋 BASIC INFORMATION
{'─'*30}
📹 **Title:** {raw_info.get('title', 'Unknown')}
📺 **Channel:** {raw_info.get('channel', 'Unknown')}
👤 **Uploader:** {raw_info.get('uploader', 'Unknown')}
📅 **Upload Date:** {formatted_date}
⏱️ **Duration:** {duration_str}
🆔 **Video ID:** {raw_info.get('id', 'Unknown')}
🔗 **Video URL:** {raw_info.get('webpage_url', 'Unknown')}

📊 PERFORMANCE METRICS
{'─'*30}
👀 **Views:** {self.format_number(view_count)} ({view_count:,} exact)
👍 **Likes:** {self.format_number(like_count)} ({like_count:,} exact)
👎 **Dislikes:** {self.format_number(dislike_count)} ({dislike_count:,} exact)
💬 **Comments:** {self.format_number(comment_count)} ({comment_count:,} exact)
👥 **Subscribers:** {self.format_number(subscriber_count)} ({subscriber_count:,} exact)
📈 **Engagement Rate:** {engagement_rate:.2f}% (likes/views)
❤️ **Like Ratio:** {like_ratio:.1f}% (likes vs total reactions)
💭 **Comment Ratio:** {comment_ratio:.3f}% (comments/views)

👑 INFLUENCER STATUS
{'─'*30}
{influencer_status}

🎬 SCENE-BY-SCENE BREAKDOWN
{'─'*30}
{scene_block}
        """.strip()

        return summary

    def download_video(self, url, quality="best", audio_only=False, cookiefile=None):
        ydl_opts = {
            'outtmpl': os.path.join(self.downloads_dir, '%(title)s.%(ext)s'),
            'format': 'bestaudio/best' if audio_only else 'best',
            'noplaylist': True
        }
        if cookiefile:
            ydl_opts['cookiefile'] = cookiefile

        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=True)
            filename = ydl.prepare_filename(info)
            return filename

analyzer = VideoAnalyzer()

def analyze_video(url, cookies_file):
    cookiefile = cookies_file.name if cookies_file else None
    return analyzer.analyze(url, cookiefile)

def download_video(url, quality, audio_only, cookies_file):
    cookiefile = cookies_file.name if cookies_file else None
    return analyzer.download_video(url, quality, audio_only, cookiefile)

with gr.Blocks(title="YouTube Analyzer + Downloader") as iface:
    gr.Markdown("# 🎬 YouTube Video Analyzer + Downloader")
    url = gr.Textbox(label="YouTube URL")
    cookies = gr.File(label="Upload cookies.txt (Optional)", file_types=[".txt"], type="file")

    with gr.Tab("📊 Analyze Video"):
        analyze_btn = gr.Button("Analyze")
        analysis_output = gr.Textbox(label="Analysis Report", lines=40, show_copy_button=True)
        analyze_btn.click(fn=analyze_video, inputs=[url, cookies], outputs=analysis_output)

    with gr.Tab("⬇️ Download Video"):
        quality = gr.Dropdown(["best", "720p", "480p"], label="Quality", value="best")
        audio_only = gr.Checkbox(label="Audio Only", value=False)
        download_btn = gr.Button("Download")
        download_output = gr.Textbox(label="Download Status")

        def handle_download(url, quality, audio_only, cookies_file):
            path = download_video(url, quality, audio_only, cookies_file)
            return f"✅ Downloaded to: {path}"

        download_btn.click(fn=handle_download, inputs=[url, quality, audio_only, cookies], outputs=download_output)

if __name__ == "__main__":
    iface.launch(debug=True)