developer28 commited on
Commit
4fde749
Β·
verified Β·
1 Parent(s): 5be84c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -167
app.py CHANGED
@@ -1,196 +1,137 @@
1
- import os
2
  import gradio as gr
3
  import yt_dlp
4
- import webvtt
5
- import google.generativeai as genai
 
 
 
 
 
6
  from datetime import datetime
 
7
 
8
- genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # Set your Gemini API key in environment
9
- MODEL_ID = "gemini-1.5-flash-latest"
10
- model = genai.GenerativeModel(model_name=MODEL_ID)
11
 
12
- class VideoAnalyzer:
13
  def __init__(self):
14
- self.temp_dir = "temp_subs"
15
- os.makedirs(self.temp_dir, exist_ok=True)
16
- self.downloads_dir = os.path.join(os.path.expanduser("~"), "Downloads", "YT_Reports")
17
- os.makedirs(self.downloads_dir, exist_ok=True)
 
 
 
 
 
 
 
 
 
18
 
19
  def is_valid_youtube_url(self, url):
20
- return "youtube.com" in url or "youtu.be" in url
21
-
22
- def download_info_and_subs(self, url, cookiefile=None):
23
- ydl_opts = {
24
- 'writesubtitles': True,
25
- 'writeautomaticsub': True,
26
- 'subtitleslangs': ['en', 'en-HI', 'hi'],
27
- 'skip_download': True,
28
- 'outtmpl': os.path.join(self.temp_dir, 'video'),
29
- }
30
- if cookiefile:
31
- ydl_opts['cookiefile'] = cookiefile
32
-
33
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
34
- info = ydl.extract_info(url, download=False)
35
- subs = info.get('automatic_captions') or info.get('subtitles')
36
- return info if subs else None, info
37
-
38
- def extract_captions(self, vtt_path):
39
- captions = []
40
- for caption in webvtt.read(vtt_path):
41
- text = caption.text.strip().replace("\n", " ")
42
- captions.append(f"[{caption.start} - {caption.end}]: {text}")
43
- return captions
44
-
45
- def generate_scene_descriptions(self, captions):
46
- prompt = """You are a visual scene narrator. Turn the following subtitles into timestamped scene descriptions.
47
- Use natural language and visual imagination. Avoid brand names.
48
-
49
- Example output:
50
- * **[0:00-0:01]:** A young woman with dark hair, wearing a red shirt, sits at a desk in a modern office...
51
-
52
- Now convert these subtitles:
53
 
54
- """
55
- prompt += "\n".join(captions[:20]) # Limit to first 20 captions
 
 
56
 
57
- response = model.generate_content(prompt)
58
- return response.text
59
 
60
- def detect_influencer_status(self, info):
61
- prompt = f"""You are a media analyst. Based on this metadata, tell if this channel is a famous influencer, brand, or regular user.
 
62
 
63
- Channel: {info.get('channel', '')}
64
- Uploader: {info.get('uploader', '')}
65
- Subscribers: {info.get('channel_followers', 0)}
66
- Title: {info.get('title', '')}
67
- Description: {info.get('description', '')}
68
-
69
- Respond with 1 line like 'Famous Influencer', 'Verified Brand Channel', or 'Regular Content Creator'."""
70
- result = model.generate_content(prompt)
71
- return result.text.strip()
72
-
73
- def format_number(self, num):
74
- if not num: return "0"
75
- if num >= 1_000_000_000: return f"{num / 1_000_000_000:.1f}B"
76
- elif num >= 1_000_000: return f"{num / 1_000_000:.1f}M"
77
- elif num >= 1_000: return f"{num / 1_000:.1f}K"
78
- return str(num)
79
-
80
- def analyze(self, url, cookiefile=None):
81
- if not self.is_valid_youtube_url(url):
82
- return "❌ Invalid YouTube URL."
83
 
84
- info, raw_info = self.download_info_and_subs(url, cookiefile)
85
- if not info:
86
- return "❌ Subtitles not found for this video."
87
 
88
- vtt_path = None
89
- for file in os.listdir(self.temp_dir):
90
- if file.endswith(".vtt"):
91
- vtt_path = os.path.join(self.temp_dir, file)
92
- break
93
 
94
- if not vtt_path:
95
- return "❌ Subtitle file not found."
96
-
97
- captions = self.extract_captions(vtt_path)
98
- scene_block = self.generate_scene_descriptions(captions)
99
-
100
- duration = raw_info.get('duration', 0)
101
- duration_str = f"{duration//3600}:{(duration%3600)//60:02d}:{duration%60:02d}" if duration else "Unknown"
102
- upload_date = raw_info.get('upload_date', '')
103
- formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}" if len(upload_date) == 8 else "Unknown"
104
 
105
- view_count = raw_info.get('view_count', 0)
106
- like_count = raw_info.get('like_count', 0)
107
- dislike_count = raw_info.get('dislike_count', 0)
108
- comment_count = raw_info.get('comment_count', 0)
109
- subscriber_count = raw_info.get('channel_followers', 0)
110
- engagement_rate = (like_count / view_count) * 100 if view_count else 0
111
- like_ratio = (like_count / (like_count + dislike_count)) * 100 if (like_count + dislike_count) else 0
112
- comment_ratio = (comment_count / view_count) * 100 if view_count else 0
113
 
114
- influencer_status = self.detect_influencer_status(raw_info)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  summary = f"""
117
  πŸ“‹ BASIC INFORMATION
118
  {'─'*30}
119
- πŸ“Ή **Title:** {raw_info.get('title', 'Unknown')}
120
- πŸ“Ί **Channel:** {raw_info.get('channel', 'Unknown')}
121
- πŸ‘€ **Uploader:** {raw_info.get('uploader', 'Unknown')}
122
- πŸ“… **Upload Date:** {formatted_date}
123
- ⏱️ **Duration:** {duration_str}
124
- πŸ†” **Video ID:** {raw_info.get('id', 'Unknown')}
125
- πŸ”— **Video URL:** {raw_info.get('webpage_url', 'Unknown')}
126
-
127
- πŸ“Š PERFORMANCE METRICS
128
- {'─'*30}
129
- πŸ‘€ **Views:** {self.format_number(view_count)} ({view_count:,} exact)
130
- πŸ‘ **Likes:** {self.format_number(like_count)} ({like_count:,} exact)
131
- πŸ‘Ž **Dislikes:** {self.format_number(dislike_count)} ({dislike_count:,} exact)
132
- πŸ’¬ **Comments:** {self.format_number(comment_count)} ({comment_count:,} exact)
133
- πŸ‘₯ **Subscribers:** {self.format_number(subscriber_count)} ({subscriber_count:,} exact)
134
- πŸ“ˆ **Engagement Rate:** {engagement_rate:.2f}% (likes/views)
135
- ❀️ **Like Ratio:** {like_ratio:.1f}% (likes vs total reactions)
136
- πŸ’­ **Comment Ratio:** {comment_ratio:.3f}% (comments/views)
137
-
138
- πŸ‘‘ INFLUENCER STATUS
139
- {'─'*30}
140
- {influencer_status}
141
 
142
  🎬 SCENE-BY-SCENE BREAKDOWN
143
  {'─'*30}
144
- {scene_block}
145
- """.strip()
146
-
147
  return summary
148
 
149
- def download_video(self, url, quality="best", audio_only=False, cookiefile=None):
150
- ydl_opts = {
151
- 'outtmpl': os.path.join(self.downloads_dir, '%(title)s.%(ext)s'),
152
- 'format': 'bestaudio/best' if audio_only else 'best',
153
- 'noplaylist': True
154
- }
155
- if cookiefile:
156
- ydl_opts['cookiefile'] = cookiefile
157
-
158
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
159
- info = ydl.extract_info(url, download=True)
160
- filename = ydl.prepare_filename(info)
161
- return filename
162
-
163
- analyzer = VideoAnalyzer()
164
-
165
- def analyze_video(url, cookies_file):
166
- cookiefile = cookies_file.name if cookies_file else None
167
- return analyzer.analyze(url, cookiefile)
168
-
169
- def download_video(url, quality, audio_only, cookies_file):
170
- cookiefile = cookies_file.name if cookies_file else None
171
- return analyzer.download_video(url, quality, audio_only, cookiefile)
172
-
173
- with gr.Blocks(title="YouTube Analyzer + Downloader") as iface:
174
- gr.Markdown("# 🎬 YouTube Video Analyzer + Downloader")
175
- url = gr.Textbox(label="YouTube URL")
176
- cookies = gr.File(label="Upload cookies.txt (Optional)", file_types=[".txt"], type="filepath")
177
-
178
- with gr.Tab("πŸ“Š Analyze Video"):
179
- analyze_btn = gr.Button("Analyze")
180
- analysis_output = gr.Textbox(label="Analysis Report", lines=40, show_copy_button=True)
181
- analyze_btn.click(fn=analyze_video, inputs=[url, cookies], outputs=analysis_output)
182
-
183
- with gr.Tab("⬇️ Download Video"):
184
- quality = gr.Dropdown(["best", "720p", "480p"], label="Quality", value="best")
185
- audio_only = gr.Checkbox(label="Audio Only", value=False)
186
- download_btn = gr.Button("Download")
187
- download_output = gr.Textbox(label="Download Status")
188
-
189
- def handle_download(url, quality, audio_only, cookies_file):
190
- path = download_video(url, quality, audio_only, cookies_file)
191
- return f"βœ… Downloaded to: {path}"
192
-
193
- download_btn.click(fn=handle_download, inputs=[url, quality, audio_only, cookies], outputs=download_output)
194
 
195
  if __name__ == "__main__":
196
  iface.launch(debug=True)
 
 
1
  import gradio as gr
2
  import yt_dlp
3
+ import os
4
+ import tempfile
5
+ import shutil
6
+ from pathlib import Path
7
+ import re
8
+ import uuid
9
+ import json
10
  from datetime import datetime
11
+ import google.generativeai as genai
12
 
13
+ # Configure Gemini
14
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
15
+ model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest")
16
 
17
+ class YouTubeDownloader:
18
  def __init__(self):
19
+ self.download_dir = tempfile.mkdtemp()
20
+ self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
21
+ self.downloads_folder = os.path.join(os.path.expanduser("~"), "Downloads", "YouTube_Downloads")
22
+ os.makedirs(self.downloads_folder, exist_ok=True)
23
+
24
+ def cleanup(self):
25
+ try:
26
+ if hasattr(self, 'download_dir') and os.path.exists(self.download_dir):
27
+ shutil.rmtree(self.download_dir)
28
+ if hasattr(self, 'temp_downloads') and os.path.exists(self.temp_downloads):
29
+ shutil.rmtree(self.temp_downloads)
30
+ except Exception as e:
31
+ print(f"⚠️ Cleanup error: {e}")
32
 
33
  def is_valid_youtube_url(self, url):
34
+ youtube_regex = re.compile(
35
+ r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/'
36
+ r'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})'
37
+ )
38
+ return youtube_regex.match(url) is not None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ def generate_scene_breakdown_gemini(self, video_info):
41
+ title = video_info.get("title", "")
42
+ description = video_info.get("description", "")
43
+ duration = video_info.get("duration", 0)
44
 
45
+ if not duration:
46
+ return ["[Duration Unknown]: Unable to generate scene breakdown."]
47
 
48
+ prompt = f"""
49
+ You are a scene breakdown expert. Based on the following video metadata, generate a detailed scene-by-scene breakdown using timestamps.
50
+ Each scene should be 5–15 seconds long (estimate). Format should be:
51
 
52
+ * **[0:00-0:10]:** A woman opens the door and looks outside.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ Use storytelling tone and avoid brand/product mentions. Do not invent company names.
 
 
55
 
56
+ Video Title: {title}
57
+ Description: {description}
58
+ Duration (in seconds): {duration}
 
 
59
 
60
+ Now generate the scene breakdown:
61
+ """
 
 
 
 
 
 
 
 
62
 
63
+ try:
64
+ response = model.generate_content(prompt)
65
+ if response.text:
66
+ return response.text.strip().split("\n")
67
+ else:
68
+ return ["[Error]: Gemini response was empty."]
69
+ except Exception as e:
70
+ return [f"[Error generating scenes with Gemini]: {str(e)}"]
71
 
72
+ def get_video_info(self, url):
73
+ if not self.is_valid_youtube_url(url):
74
+ return None, "❌ Invalid YouTube URL"
75
+
76
+ try:
77
+ ydl_opts = {'noplaylist': True, 'extract_flat': False}
78
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
79
+ info = ydl.extract_info(url, download=False)
80
+ return info, "βœ… Video information extracted"
81
+ except Exception as e:
82
+ return None, f"❌ Error: {str(e)}"
83
+
84
+ def format_video_info(self, video_info):
85
+ title = video_info.get("title", "Unknown")
86
+ uploader = video_info.get("uploader", "Unknown")
87
+ duration = video_info.get("duration", 0)
88
+ duration_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown"
89
+ view_count = video_info.get("view_count", 0)
90
+ like_count = video_info.get("like_count", 0)
91
+ comment_count = video_info.get("comment_count", 0)
92
+ upload_date = video_info.get("upload_date", "Unknown")
93
+ formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:]}" if len(upload_date) == 8 else "Unknown"
94
+
95
+ scene_descriptions = self.generate_scene_breakdown_gemini(video_info)
96
+
97
+ def fmt(n):
98
+ if not n: return "0"
99
+ if n > 1_000_000: return f"{n/1_000_000:.1f}M"
100
+ if n > 1_000: return f"{n/1_000:.1f}K"
101
+ return str(n)
102
 
103
  summary = f"""
104
  πŸ“‹ BASIC INFORMATION
105
  {'─'*30}
106
+ πŸ“Ή Title: {title}
107
+ πŸ‘€ Uploader: {uploader}
108
+ πŸ“… Upload Date: {formatted_date}
109
+ ⏱️ Duration: {duration_str}
110
+ πŸ‘€ Views: {fmt(view_count)}
111
+ πŸ‘ Likes: {fmt(like_count)}
112
+ πŸ’¬ Comments: {fmt(comment_count)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  🎬 SCENE-BY-SCENE BREAKDOWN
115
  {'─'*30}
116
+ {chr(10).join(scene_descriptions)}
117
+ """
 
118
  return summary
119
 
120
+ downloader = YouTubeDownloader()
121
+
122
+ def analyze_video(url):
123
+ info, msg = downloader.get_video_info(url)
124
+ if not info:
125
+ return msg
126
+ return downloader.format_video_info(info)
127
+
128
+ iface = gr.Interface(
129
+ fn=analyze_video,
130
+ inputs=gr.Textbox(label="YouTube URL"),
131
+ outputs=gr.Textbox(label="Gemini-Generated Scene Breakdown", lines=30, show_copy_button=True),
132
+ title="🎬 Gemini Scene Breakdown",
133
+ description="Generates scene-by-scene descriptions using Gemini Flash based on video metadata"
134
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  if __name__ == "__main__":
137
  iface.launch(debug=True)