Spaces:

developer28
/

Youtubedownloader

Sleeping

App Files Files Community

developer28 commited on 30 days ago

Commit

4fde749

verified ·

1 Parent(s): 5be84c5

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -167

app.py CHANGED Viewed

@@ -1,196 +1,137 @@
-import os
 import gradio as gr
 import yt_dlp
-import webvtt
-import google.generativeai as genai
 from datetime import datetime
-genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))  # Set your Gemini API key in environment
-MODEL_ID = "gemini-1.5-flash-latest"
-model = genai.GenerativeModel(model_name=MODEL_ID)
-class VideoAnalyzer:
     def __init__(self):
-        self.temp_dir = "temp_subs"
-        os.makedirs(self.temp_dir, exist_ok=True)
-        self.downloads_dir = os.path.join(os.path.expanduser("~"), "Downloads", "YT_Reports")
-        os.makedirs(self.downloads_dir, exist_ok=True)
     def is_valid_youtube_url(self, url):
-        return "youtube.com" in url or "youtu.be" in url
-    def download_info_and_subs(self, url, cookiefile=None):
-        ydl_opts = {
-            'writesubtitles': True,
-            'writeautomaticsub': True,
-            'subtitleslangs': ['en', 'en-HI', 'hi'],
-            'skip_download': True,
-            'outtmpl': os.path.join(self.temp_dir, 'video'),
-        }
-        if cookiefile:
-            ydl_opts['cookiefile'] = cookiefile
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(url, download=False)
-            subs = info.get('automatic_captions') or info.get('subtitles')
-            return info if subs else None, info
-    def extract_captions(self, vtt_path):
-        captions = []
-        for caption in webvtt.read(vtt_path):
-            text = caption.text.strip().replace("\n", " ")
-            captions.append(f"[{caption.start} - {caption.end}]: {text}")
-        return captions
-    def generate_scene_descriptions(self, captions):
-        prompt = """You are a visual scene narrator. Turn the following subtitles into timestamped scene descriptions.
-Use natural language and visual imagination. Avoid brand names.
-Example output:
-* **[0:00-0:01]:** A young woman with dark hair, wearing a red shirt, sits at a desk in a modern office...
-Now convert these subtitles:
-"""
-        prompt += "\n".join(captions[:20])  # Limit to first 20 captions
-        response = model.generate_content(prompt)
-        return response.text
-    def detect_influencer_status(self, info):
-        prompt = f"""You are a media analyst. Based on this metadata, tell if this channel is a famous influencer, brand, or regular user.
-Channel: {info.get('channel', '')}
-Uploader: {info.get('uploader', '')}
-Subscribers: {info.get('channel_followers', 0)}
-Title: {info.get('title', '')}
-Description: {info.get('description', '')}
-Respond with 1 line like 'Famous Influencer', 'Verified Brand Channel', or 'Regular Content Creator'."""
-        result = model.generate_content(prompt)
-        return result.text.strip()
-    def format_number(self, num):
-        if not num: return "0"
-        if num >= 1_000_000_000: return f"{num / 1_000_000_000:.1f}B"
-        elif num >= 1_000_000: return f"{num / 1_000_000:.1f}M"
-        elif num >= 1_000: return f"{num / 1_000:.1f}K"
-        return str(num)
-    def analyze(self, url, cookiefile=None):
-        if not self.is_valid_youtube_url(url):
-            return "❌ Invalid YouTube URL."
-        info, raw_info = self.download_info_and_subs(url, cookiefile)
-        if not info:
-            return "❌ Subtitles not found for this video."
-        vtt_path = None
-        for file in os.listdir(self.temp_dir):
-            if file.endswith(".vtt"):
-                vtt_path = os.path.join(self.temp_dir, file)
-                break
-        if not vtt_path:
-            return "❌ Subtitle file not found."
-        captions = self.extract_captions(vtt_path)
-        scene_block = self.generate_scene_descriptions(captions)
-        duration = raw_info.get('duration', 0)
-        duration_str = f"{duration//3600}:{(duration%3600)//60:02d}:{duration%60:02d}" if duration else "Unknown"
-        upload_date = raw_info.get('upload_date', '')
-        formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}" if len(upload_date) == 8 else "Unknown"
-        view_count = raw_info.get('view_count', 0)
-        like_count = raw_info.get('like_count', 0)
-        dislike_count = raw_info.get('dislike_count', 0)
-        comment_count = raw_info.get('comment_count', 0)
-        subscriber_count = raw_info.get('channel_followers', 0)
-        engagement_rate = (like_count / view_count) * 100 if view_count else 0
-        like_ratio = (like_count / (like_count + dislike_count)) * 100 if (like_count + dislike_count) else 0
-        comment_ratio = (comment_count / view_count) * 100 if view_count else 0
-        influencer_status = self.detect_influencer_status(raw_info)
         summary = f"""
 📋 BASIC INFORMATION
 {'─'*30}
-📹 **Title:** {raw_info.get('title', 'Unknown')}
-📺 **Channel:** {raw_info.get('channel', 'Unknown')}
-👤 **Uploader:** {raw_info.get('uploader', 'Unknown')}
-📅 **Upload Date:** {formatted_date}
-⏱️ **Duration:** {duration_str}
-🆔 **Video ID:** {raw_info.get('id', 'Unknown')}
-🔗 **Video URL:** {raw_info.get('webpage_url', 'Unknown')}
-📊 PERFORMANCE METRICS
-{'─'*30}
-👀 **Views:** {self.format_number(view_count)} ({view_count:,} exact)
-👍 **Likes:** {self.format_number(like_count)} ({like_count:,} exact)
-👎 **Dislikes:** {self.format_number(dislike_count)} ({dislike_count:,} exact)
-💬 **Comments:** {self.format_number(comment_count)} ({comment_count:,} exact)
-👥 **Subscribers:** {self.format_number(subscriber_count)} ({subscriber_count:,} exact)
-📈 **Engagement Rate:** {engagement_rate:.2f}% (likes/views)
-❤️ **Like Ratio:** {like_ratio:.1f}% (likes vs total reactions)
-💭 **Comment Ratio:** {comment_ratio:.3f}% (comments/views)
-👑 INFLUENCER STATUS
-{'─'*30}
-{influencer_status}
 🎬 SCENE-BY-SCENE BREAKDOWN
 {'─'*30}
-{scene_block}
-        """.strip()
         return summary
-    def download_video(self, url, quality="best", audio_only=False, cookiefile=None):
-        ydl_opts = {
-            'outtmpl': os.path.join(self.downloads_dir, '%(title)s.%(ext)s'),
-            'format': 'bestaudio/best' if audio_only else 'best',
-            'noplaylist': True
-        }
-        if cookiefile:
-            ydl_opts['cookiefile'] = cookiefile
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(url, download=True)
-            filename = ydl.prepare_filename(info)
-            return filename
-analyzer = VideoAnalyzer()
-def analyze_video(url, cookies_file):
-    cookiefile = cookies_file.name if cookies_file else None
-    return analyzer.analyze(url, cookiefile)
-def download_video(url, quality, audio_only, cookies_file):
-    cookiefile = cookies_file.name if cookies_file else None
-    return analyzer.download_video(url, quality, audio_only, cookiefile)
-with gr.Blocks(title="YouTube Analyzer + Downloader") as iface:
-    gr.Markdown("# 🎬 YouTube Video Analyzer + Downloader")
-    url = gr.Textbox(label="YouTube URL")
-    cookies = gr.File(label="Upload cookies.txt (Optional)", file_types=[".txt"], type="filepath")
-    with gr.Tab("📊 Analyze Video"):
-        analyze_btn = gr.Button("Analyze")
-        analysis_output = gr.Textbox(label="Analysis Report", lines=40, show_copy_button=True)
-        analyze_btn.click(fn=analyze_video, inputs=[url, cookies], outputs=analysis_output)
-    with gr.Tab("⬇️ Download Video"):
-        quality = gr.Dropdown(["best", "720p", "480p"], label="Quality", value="best")
-        audio_only = gr.Checkbox(label="Audio Only", value=False)
-        download_btn = gr.Button("Download")
-        download_output = gr.Textbox(label="Download Status")
-        def handle_download(url, quality, audio_only, cookies_file):
-            path = download_video(url, quality, audio_only, cookies_file)
-            return f"✅ Downloaded to: {path}"
-        download_btn.click(fn=handle_download, inputs=[url, quality, audio_only, cookies], outputs=download_output)
 if __name__ == "__main__":
     iface.launch(debug=True)

 import gradio as gr
 import yt_dlp
+import os
+import tempfile
+import shutil
+from pathlib import Path
+import re
+import uuid
+import json
 from datetime import datetime
+import google.generativeai as genai
+# Configure Gemini
+genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest")
+class YouTubeDownloader:
     def __init__(self):
+        self.download_dir = tempfile.mkdtemp()
+        self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
+        self.downloads_folder = os.path.join(os.path.expanduser("~"), "Downloads", "YouTube_Downloads")
+        os.makedirs(self.downloads_folder, exist_ok=True)
+    def cleanup(self):
+        try:
+            if hasattr(self, 'download_dir') and os.path.exists(self.download_dir):
+                shutil.rmtree(self.download_dir)
+            if hasattr(self, 'temp_downloads') and os.path.exists(self.temp_downloads):
+                shutil.rmtree(self.temp_downloads)
+        except Exception as e:
+            print(f"⚠️ Cleanup error: {e}")
     def is_valid_youtube_url(self, url):
+        youtube_regex = re.compile(
+            r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/'
+            r'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})'
+        )
+        return youtube_regex.match(url) is not None
+    def generate_scene_breakdown_gemini(self, video_info):
+        title = video_info.get("title", "")
+        description = video_info.get("description", "")
+        duration = video_info.get("duration", 0)
+        if not duration:
+            return ["[Duration Unknown]: Unable to generate scene breakdown."]
+        prompt = f"""
+You are a scene breakdown expert. Based on the following video metadata, generate a detailed scene-by-scene breakdown using timestamps.
+Each scene should be 5–15 seconds long (estimate). Format should be:
+* **[0:00-0:10]:** A woman opens the door and looks outside.
+Use storytelling tone and avoid brand/product mentions. Do not invent company names.
+Video Title: {title}
+Description: {description}
+Duration (in seconds): {duration}
+Now generate the scene breakdown:
+"""
+        try:
+            response = model.generate_content(prompt)
+            if response.text:
+                return response.text.strip().split("\n")
+            else:
+                return ["[Error]: Gemini response was empty."]
+        except Exception as e:
+            return [f"[Error generating scenes with Gemini]: {str(e)}"]
+    def get_video_info(self, url):
+        if not self.is_valid_youtube_url(url):
+            return None, "❌ Invalid YouTube URL"
+        try:
+            ydl_opts = {'noplaylist': True, 'extract_flat': False}
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                info = ydl.extract_info(url, download=False)
+            return info, "✅ Video information extracted"
+        except Exception as e:
+            return None, f"❌ Error: {str(e)}"
+    def format_video_info(self, video_info):
+        title = video_info.get("title", "Unknown")
+        uploader = video_info.get("uploader", "Unknown")
+        duration = video_info.get("duration", 0)
+        duration_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown"
+        view_count = video_info.get("view_count", 0)
+        like_count = video_info.get("like_count", 0)
+        comment_count = video_info.get("comment_count", 0)
+        upload_date = video_info.get("upload_date", "Unknown")
+        formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:]}" if len(upload_date) == 8 else "Unknown"
+        scene_descriptions = self.generate_scene_breakdown_gemini(video_info)
+        def fmt(n):
+            if not n: return "0"
+            if n > 1_000_000: return f"{n/1_000_000:.1f}M"
+            if n > 1_000: return f"{n/1_000:.1f}K"
+            return str(n)
         summary = f"""
 📋 BASIC INFORMATION
 {'─'*30}
+📹 Title: {title}
+👤 Uploader: {uploader}
+📅 Upload Date: {formatted_date}
+⏱️ Duration: {duration_str}
+👀 Views: {fmt(view_count)}
+👍 Likes: {fmt(like_count)}
+💬 Comments: {fmt(comment_count)}
 🎬 SCENE-BY-SCENE BREAKDOWN
 {'─'*30}
+{chr(10).join(scene_descriptions)}
+"""
         return summary
+downloader = YouTubeDownloader()
+def analyze_video(url):
+    info, msg = downloader.get_video_info(url)
+    if not info:
+        return msg
+    return downloader.format_video_info(info)
+iface = gr.Interface(
+    fn=analyze_video,
+    inputs=gr.Textbox(label="YouTube URL"),
+    outputs=gr.Textbox(label="Gemini-Generated Scene Breakdown", lines=30, show_copy_button=True),
+    title="🎬 Gemini Scene Breakdown",
+    description="Generates scene-by-scene descriptions using Gemini Flash based on video metadata"
+)
 if __name__ == "__main__":
     iface.launch(debug=True)