Spaces:

developer28
/

Youtubedownloader

Sleeping

App Files Files Community

developer28 commited on Jun 18

Commit

39fa25b

verified ·

1 Parent(s): ca78672

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -223

app.py CHANGED Viewed

@@ -1,232 +1,196 @@
 import gradio as gr
 import yt_dlp
-import os
-import tempfile
-import shutil
-from pathlib import Path
-import re
-import uuid
-session_data = {}
-class YouTubeDownloader:
     def __init__(self):
-        self.download_dir = tempfile.mkdtemp()
     def is_valid_youtube_url(self, url):
-        youtube_regex = re.compile(
-            r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/'
-            r'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})'
-        )
-        return youtube_regex.match(url) is not None
-    def format_video_info(self, video_info):
-        """Format video information into a readable report"""
-        if not video_info:
-            return "❌ No video information available."
-        # Format duration
-        duration = video_info.get('duration', 0)
-        duration_str = f"{duration//3600}:{(duration%3600)//60:02d}:{duration%60:02d}" if duration else "Unknown"
-        # Format upload date
-        upload_date = video_info.get('upload_date', '')
-        if upload_date and len(upload_date) == 8:
-            formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}"
-        else:
-            formatted_date = upload_date or "Unknown"
-        # Format numbers
-        def format_number(num):
-            if num >= 1_000_000:
-                return f"{num/1_000_000:.1f}M"
-            elif num >= 1_000:
-                return f"{num/1_000:.1f}K"
-            else:
-                return str(num)
-        # Build the report
-        report = f"""
-📹 VIDEO ANALYSIS REPORT
-{'='*50}
-📝 BASIC INFORMATION:
-• Title: {video_info.get('title', 'Unknown')}
-• Channel: {video_info.get('channel', 'Unknown')}
-• Uploader: {video_info.get('uploader', 'Unknown')}
-• Upload Date: {formatted_date}
-• Duration: {duration_str}
-📊 STATISTICS:
-• Views: {format_number(video_info.get('view_count', 0))}
-• Likes: {format_number(video_info.get('like_count', 0))}
-• Comments: {format_number(video_info.get('comment_count', 0))}
-• Channel Followers: {format_number(video_info.get('channel_followers', 0))}
-🏷️ CATEGORIES & TAGS:
-• Categories: {', '.join(video_info.get('categories', [])) or 'None'}
-• Tags: {', '.join(video_info.get('tags', [])[:10]) or 'None'}
-{('• More tags...' if len(video_info.get('tags', [])) > 10 else '')}
-📖 DESCRIPTION:
-{video_info.get('description', 'No description available')[:500]}
-{'...' if len(video_info.get('description', '')) > 500 else ''}
-🔗 VIDEO URL:
-{video_info.get('webpage_url', 'Unknown')}
-        """
-        return report.strip()
-    def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
-        if not url or not url.strip():
-            return None, "❌ Please enter a YouTube URL"
         if not self.is_valid_youtube_url(url):
-            return None, "❌ Invalid YouTube URL. Please enter a valid YouTube video URL"
-        try:
-            progress(0.2, desc="Fetching video information...")
-            ydl_opts = {
-                'noplaylist': True,
-                'extract_flat': False,
-            }
-            if cookiefile:
-                ydl_opts['cookiefile'] = cookiefile
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                try:
-                    info = ydl.extract_info(url, download=False)
-                    video_info = {
-                        'title': info.get('title', 'Unknown'),
-                        'description': info.get('description', 'No description available'),
-                        'duration': info.get('duration', 0),
-                        'view_count': info.get('view_count', 0),
-                        'like_count': info.get('like_count', 0),
-                        'comment_count': info.get('comment_count', 0),
-                        'upload_date': info.get('upload_date', ''),
-                        'uploader': info.get('uploader', 'Unknown'),
-                        'channel': info.get('channel', 'Unknown'),
-                        'channel_followers': info.get('channel_follower_count', 0),
-                        'tags': info.get('tags', []),
-                        'categories': info.get('categories', []),
-                        'thumbnail': info.get('thumbnail', ''),
-                        'webpage_url': info.get('webpage_url', url)
-                    }
-                    progress(1.0, desc="Information retrieved!")
-                    return video_info, "✅ Video information retrieved successfully"
-                except yt_dlp.DownloadError as e:
-                    error_msg = str(e)
-                    if "Video unavailable" in error_msg:
-                        return None, "❌ Video is unavailable or private"
-                    elif "age-restricted" in error_msg.lower():
-                        return None, "❌ Video is age-restricted"
-                    else:
-                        return None, f"❌ Failed to get video info: {error_msg}"
-        except Exception as e:
-            return None, f"❌ An unexpected error occurred: {str(e)}"
-    def download_video(self, url, progress=gr.Progress(), cookiefile=None):
-        if not url or not url.strip():
-            return None, "❌ Please enter a YouTube URL"
-        if not self.is_valid_youtube_url(url):
-            return None, "❌ Invalid YouTube URL. Please enter a valid YouTube video URL"
-        try:
-            progress(0.1, desc="Initializing download...")
-            ydl_opts = {
-                'format': 'best[ext=mp4]/best',
-                'outtmpl': os.path.join(self.download_dir, '%(title)s.%(ext)s'),
-                'noplaylist': True,
-            }
-            if cookiefile:
-                ydl_opts['cookiefile'] = cookiefile
-            progress(0.3, desc="Fetching video information...")
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                try:
-                    info = ydl.extract_info(url, download=False)
-                    video_title = info.get('title', 'Unknown')
-                    duration = info.get('duration', 0)
-                    progress(0.5, desc=f"Downloading: {video_title[:50]}...")
-                    ydl.download([url])
-                    progress(0.9, desc="Finalizing download...")
-                    downloaded_files = list(Path(self.download_dir).glob('*'))
-                    if downloaded_files:
-                        downloaded_file = downloaded_files[0]
-                        file_size = downloaded_file.stat().st_size / (1024 * 1024)
-                        progress(1.0, desc="Download completed!")
-                        success_message = f"✅ Successfully downloaded: {video_title}\n"
-                        success_message += f"📁 File size: {file_size:.1f} MB\n"
-                        success_message += f"⏱️ Duration: {duration//60}:{duration%60:02d}" if duration else ""
-                        return str(downloaded_file), success_message
-                    else:
-                        return None, "❌ Download completed but file not found"
-                except yt_dlp.DownloadError as e:
-                    error_msg = str(e)
-                    if "Video unavailable" in error_msg:
-                        return None, "❌ Video is unavailable or private"
-                    elif "age-restricted" in error_msg.lower():
-                        return None, "❌ Video is age-restricted and cannot be downloaded"
-                    elif "copyright" in error_msg.lower():
-                        return None, "❌ Video cannot be downloaded due to copyright restrictions"
-                    else:
-                        return None, f"❌ Download failed: {error_msg}"
-        except Exception as e:
-            return None, f"❌ An unexpected error occurred: {str(e)}"
-    def cleanup(self):
-        try:
-            shutil.rmtree(self.download_dir, ignore_errors=True)
-        except:
-            pass
-downloader = YouTubeDownloader()
-def create_interface():
-    with gr.Blocks(title="YouTube Video Downloader & Analyzer") as demo:
-        url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
-        cookies_input = gr.File(label="Upload cookies.txt (optional)", type="filepath", file_types=[".txt"])
-        download_btn = gr.Button("Download Video")
-        status_output = gr.Textbox(label="Download Status")
-        file_output = gr.File(label="Downloaded Video", visible=False)
-        analysis_btn = gr.Button("Show Analysis Results", visible=False)
-        analysis_output = gr.Textbox(label="Video Analysis Results", visible=False, lines=20)
-        video_info_state = gr.Textbox(value=False)
-        def handle_download(url, cookies_path):
-            if not url or not url.strip():
-                return "Please enter a YouTube URL", gr.File(visible=False), gr.Button(visible=False), None
-            cookiefile = cookies_path.strip() if cookies_path and os.path.exists(cookies_path.strip()) else None
-            video_info, info_message = downloader.get_video_info(url, cookiefile=cookiefile)
-            file_path, download_message = downloader.download_video(url, cookiefile=cookiefile)
-            print(f"[DEBUG] Download message: {download_message}")
-            if file_path and video_info:
-                success_message = f"{download_message}\n\nVideo downloaded successfully! Click 'Show Analysis Results' to see detailed information."
-                return success_message, gr.File(value=file_path, visible=True), gr.Button(visible=True), gr.State(video_info)
-            elif file_path:
-                return f"{download_message}\n\nVideo downloaded but analysis data unavailable.", gr.File(value=file_path, visible=True), gr.Button(visible=False), gr.State("")
-            else:
-                return f"❌ Download failed:\n{download_message}", gr.File(visible=False), gr.Button(visible=False), gr.State(None)
-        def show_analysis(video_info):
-            print("[DEBUG] Received session_id:", session_id)
-            video_info = session_data.get(session_id)
-            if video_info:
-                return downloader.format_video_info(video_info), gr.Textbox(visible=True)
-            return "❌ No analysis data available.", gr.Textbox(visible=True)
-        download_btn.click(handle_download, inputs=[url_input, cookies_input], outputs=[status_output, file_output, analysis_btn, video_info_state])
-        analysis_btn.click(show_analysis, inputs=[video_info_state], outputs=[analysis_output])
-        url_input.submit(handle_download, inputs=[url_input, cookies_input], outputs=[status_output, file_output, analysis_btn, video_info_state])
-    return demo
 if __name__ == "__main__":
-    demo = create_interface()
-    import atexit
-    atexit.register(downloader.cleanup)
-    demo.launch()

+import os
 import gradio as gr
 import yt_dlp
+import webvtt
+import google.generativeai as genai
+from datetime import datetime
+genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))  # Set your Gemini API key in environment
+MODEL_ID = "gemini-1.5-flash-latest"
+model = genai.GenerativeModel(model_name=MODEL_ID)
+class VideoAnalyzer:
     def __init__(self):
+        self.temp_dir = "temp_subs"
+        os.makedirs(self.temp_dir, exist_ok=True)
+        self.downloads_dir = os.path.join(os.path.expanduser("~"), "Downloads", "YT_Reports")
+        os.makedirs(self.downloads_dir, exist_ok=True)
     def is_valid_youtube_url(self, url):
+        return "youtube.com" in url or "youtu.be" in url
+    def download_info_and_subs(self, url, cookiefile=None):
+        ydl_opts = {
+            'writesubtitles': True,
+            'writeautomaticsub': True,
+            'subtitleslangs': ['en', 'en-HI', 'hi'],
+            'skip_download': True,
+            'outtmpl': os.path.join(self.temp_dir, 'video'),
+        }
+        if cookiefile:
+            ydl_opts['cookiefile'] = cookiefile
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=False)
+            subs = info.get('automatic_captions') or info.get('subtitles')
+            return info if subs else None, info
+    def extract_captions(self, vtt_path):
+        captions = []
+        for caption in webvtt.read(vtt_path):
+            text = caption.text.strip().replace("\n", " ")
+            captions.append(f"[{caption.start} - {caption.end}]: {text}")
+        return captions
+    def generate_scene_descriptions(self, captions):
+        prompt = """You are a visual scene narrator. Turn the following subtitles into timestamped scene descriptions.
+Use natural language and visual imagination. Avoid brand names.
+Example output:
+* **[0:00-0:01]:** A young woman with dark hair, wearing a red shirt, sits at a desk in a modern office...
+Now convert these subtitles:
+"""
+        prompt += "\n".join(captions[:20])  # Limit to first 20 captions
+        response = model.generate_content(prompt)
+        return response.text
+    def detect_influencer_status(self, info):
+        prompt = f"""You are a media analyst. Based on this metadata, tell if this channel is a famous influencer, brand, or regular user.
+Channel: {info.get('channel', '')}
+Uploader: {info.get('uploader', '')}
+Subscribers: {info.get('channel_followers', 0)}
+Title: {info.get('title', '')}
+Description: {info.get('description', '')}
+Respond with 1 line like 'Famous Influencer', 'Verified Brand Channel', or 'Regular Content Creator'."""
+        result = model.generate_content(prompt)
+        return result.text.strip()
+    def format_number(self, num):
+        if not num: return "0"
+        if num >= 1_000_000_000: return f"{num / 1_000_000_000:.1f}B"
+        elif num >= 1_000_000: return f"{num / 1_000_000:.1f}M"
+        elif num >= 1_000: return f"{num / 1_000:.1f}K"
+        return str(num)
+    def analyze(self, url, cookiefile=None):
         if not self.is_valid_youtube_url(url):
+            return "❌ Invalid YouTube URL."
+        info, raw_info = self.download_info_and_subs(url, cookiefile)
+        if not info:
+            return "❌ Subtitles not found for this video."
+        vtt_path = None
+        for file in os.listdir(self.temp_dir):
+            if file.endswith(".vtt"):
+                vtt_path = os.path.join(self.temp_dir, file)
+                break
+        if not vtt_path:
+            return "❌ Subtitle file not found."
+        captions = self.extract_captions(vtt_path)
+        scene_block = self.generate_scene_descriptions(captions)
+        duration = raw_info.get('duration', 0)
+        duration_str = f"{duration//3600}:{(duration%3600)//60:02d}:{duration%60:02d}" if duration else "Unknown"
+        upload_date = raw_info.get('upload_date', '')
+        formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}" if len(upload_date) == 8 else "Unknown"
+        view_count = raw_info.get('view_count', 0)
+        like_count = raw_info.get('like_count', 0)
+        dislike_count = raw_info.get('dislike_count', 0)
+        comment_count = raw_info.get('comment_count', 0)
+        subscriber_count = raw_info.get('channel_followers', 0)
+        engagement_rate = (like_count / view_count) * 100 if view_count else 0
+        like_ratio = (like_count / (like_count + dislike_count)) * 100 if (like_count + dislike_count) else 0
+        comment_ratio = (comment_count / view_count) * 100 if view_count else 0
+        influencer_status = self.detect_influencer_status(raw_info)
+        summary = f"""
+📋 BASIC INFORMATION
+{'─'*30}
+📹 **Title:** {raw_info.get('title', 'Unknown')}
+📺 **Channel:** {raw_info.get('channel', 'Unknown')}
+👤 **Uploader:** {raw_info.get('uploader', 'Unknown')}
+📅 **Upload Date:** {formatted_date}
+⏱️ **Duration:** {duration_str}
+🆔 **Video ID:** {raw_info.get('id', 'Unknown')}
+🔗 **Video URL:** {raw_info.get('webpage_url', 'Unknown')}
+📊 PERFORMANCE METRICS
+{'─'*30}
+👀 **Views:** {self.format_number(view_count)} ({view_count:,} exact)
+👍 **Likes:** {self.format_number(like_count)} ({like_count:,} exact)
+👎 **Dislikes:** {self.format_number(dislike_count)} ({dislike_count:,} exact)
+💬 **Comments:** {self.format_number(comment_count)} ({comment_count:,} exact)
+👥 **Subscribers:** {self.format_number(subscriber_count)} ({subscriber_count:,} exact)
+📈 **Engagement Rate:** {engagement_rate:.2f}% (likes/views)
+❤️ **Like Ratio:** {like_ratio:.1f}% (likes vs total reactions)
+💭 **Comment Ratio:** {comment_ratio:.3f}% (comments/views)
+👑 INFLUENCER STATUS
+{'─'*30}
+{influencer_status}
+🎬 SCENE-BY-SCENE BREAKDOWN
+{'─'*30}
+{scene_block}
+        """.strip()
+        return summary
+    def download_video(self, url, quality="best", audio_only=False, cookiefile=None):
+        ydl_opts = {
+            'outtmpl': os.path.join(self.downloads_dir, '%(title)s.%(ext)s'),
+            'format': 'bestaudio/best' if audio_only else 'best',
+            'noplaylist': True
+        }
+        if cookiefile:
+            ydl_opts['cookiefile'] = cookiefile
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=True)
+            filename = ydl.prepare_filename(info)
+            return filename
+analyzer = VideoAnalyzer()
+def analyze_video(url, cookies_file):
+    cookiefile = cookies_file.name if cookies_file else None
+    return analyzer.analyze(url, cookiefile)
+def download_video(url, quality, audio_only, cookies_file):
+    cookiefile = cookies_file.name if cookies_file else None
+    return analyzer.download_video(url, quality, audio_only, cookiefile)
+with gr.Blocks(title="YouTube Analyzer + Downloader") as iface:
+    gr.Markdown("# 🎬 YouTube Video Analyzer + Downloader")
+    url = gr.Textbox(label="YouTube URL")
+    cookies = gr.File(label="Upload cookies.txt (Optional)", file_types=[".txt"], type="file")
+    with gr.Tab("📊 Analyze Video"):
+        analyze_btn = gr.Button("Analyze")
+        analysis_output = gr.Textbox(label="Analysis Report", lines=40, show_copy_button=True)
+        analyze_btn.click(fn=analyze_video, inputs=[url, cookies], outputs=analysis_output)
+    with gr.Tab("⬇️ Download Video"):
+        quality = gr.Dropdown(["best", "720p", "480p"], label="Quality", value="best")
+        audio_only = gr.Checkbox(label="Audio Only", value=False)
+        download_btn = gr.Button("Download")
+        download_output = gr.Textbox(label="Download Status")
+        def handle_download(url, quality, audio_only, cookies_file):
+            path = download_video(url, quality, audio_only, cookies_file)
+            return f"✅ Downloaded to: {path}"
+        download_btn.click(fn=handle_download, inputs=[url, quality, audio_only, cookies], outputs=download_output)
 if __name__ == "__main__":
+    iface.launch(debug=True)